├── .idea ├── .name ├── copyright │ ├── profiles_settings.xml │ └── libble.xml ├── encodings.xml ├── vcs.xml ├── scala_compiler.xml ├── libraries │ ├── Maven__oro_oro_2_0_8.xml │ ├── Maven__junit_junit_4_12.xml │ ├── Maven__org_tukaani_xz_1_0.xml │ ├── Maven__log4j_log4j_1_2_17.xml │ ├── Maven__xmlenc_xmlenc_0_52.xml │ ├── Maven__net_sf_py4j_py4j_0_10_3.xml │ ├── Maven__org_apache_ivy_ivy_2_4_0.xml │ ├── Maven__net_jpountz_lz4_lz4_1_3_0.xml │ ├── Maven__org_apache_avro_avro_1_7_7.xml │ ├── Maven__com_google_inject_guice_3_0.xml │ ├── Maven__net_razorvine_pyrolite_4_9.xml │ ├── Maven__net_sf_opencsv_opencsv_2_3.xml │ ├── Maven__commons_io_commons_io_2_1.xml │ ├── Maven__org_objenesis_objenesis_2_1.xml │ ├── Maven__aopalliance_aopalliance_1_0.xml │ ├── Maven__com_google_guava_guava_14_0_1.xml │ ├── Maven__commons_cli_commons_cli_1_2.xml │ ├── Maven__commons_net_commons_net_2_2.xml │ ├── Maven__io_netty_netty_3_8_0_Final.xml │ ├── Maven__javax_inject_javax_inject_1.xml │ ├── Maven__org_scala_lang_scalap_2_11_8.xml │ ├── Maven__org_slf4j_slf4j_api_1_7_21.xml │ ├── Maven__org_jpmml_pmml_model_1_2_15.xml │ ├── Maven__com_ning_compress_lzf_1_0_3.xml │ ├── Maven__com_twitter_chill_2_11_0_8_0.xml │ ├── Maven__com_twitter_chill_java_0_8_0.xml │ ├── Maven__commons_lang_commons_lang_2_5.xml │ ├── Maven__net_java_dev_jets3t_jets3t_0_7_1.xml │ ├── Maven__org_apache_avro_avro_ipc_1_7_7.xml │ ├── Maven__org_codehaus_janino_janino_2_7_8.xml │ ├── Maven__org_jpmml_pmml_schema_1_2_15.xml │ ├── Maven__com_esotericsoftware_minlog_1_3_0.xml │ ├── Maven__org_hamcrest_hamcrest_core_1_3.xml │ ├── Maven__com_github_fommil_netlib_core_1_1_2.xml │ ├── Maven__commons_codec_commons_codec_1_3.xml │ ├── Maven__org_slf4j_jul_to_slf4j_1_7_16.xml │ ├── Maven__org_spire_math_spire_2_11_0_7_4.xml │ ├── Maven__com_github_rwl_jtransforms_2_4_0.xml │ ├── Maven__org_antlr_antlr4_runtime_4_5_3.xml │ ├── Maven__org_scalanlp_breeze_2_11_0_11_2.xml │ ├── Maven__org_slf4j_slf4j_log4j12_1_7_21.xml │ ├── Maven__org_scala_sbt_test_interface_1_0.xml │ ├── Maven__org_spark_project_spark_unused_1_0_0.xml │ ├── Maven__com_google_code_findbugs_jsr305_1_3_9.xml │ ├── Maven__io_netty_netty_all_4_0_29_Final.xml │ ├── Maven__org_apache_avro_avro_ipc_tests_1_7_7.xml │ ├── Maven__org_slf4j_jcl_over_slf4j_1_7_16.xml │ ├── Maven__com_clearspring_analytics_stream_2_7_0.xml │ ├── Maven__com_novocode_junit_interface_0_11.xml │ ├── Maven__org_apache_commons_commons_math_2_1.xml │ ├── Maven__org_apache_hadoop_hadoop_auth_2_2_0.xml │ ├── Maven__org_apache_hadoop_hadoop_hdfs_2_2_0.xml │ ├── Maven__org_apache_zookeeper_zookeeper_3_4_5.xml │ ├── Maven__org_glassfish_hk2_hk2_api_2_4_0_b34.xml │ ├── Maven__org_javassist_javassist_3_18_1_GA.xml │ ├── Maven__org_mortbay_jetty_jetty_util_6_1_26.xml │ ├── Maven__javax_ws_rs_javax_ws_rs_api_2_0_1.xml │ ├── Maven__org_apache_commons_commons_math3_3_2.xml │ ├── Maven__org_apache_mesos_mesos_shaded_protobuf_0_21_1.xml │ ├── Maven__org_json4s_json4s_ast_2_11_3_2_11.xml │ ├── Maven__org_scala_lang_scala_actors_2_11_8.xml │ ├── Maven__org_scalatest_scalatest_2_11_2_2_6.xml │ ├── Maven__org_scala_lang_scala_reflect_2_11_8.xml │ ├── Maven__com_esotericsoftware_kryo_shaded_3_0_3.xml │ ├── Maven__com_thoughtworks_paranamer_paranamer_2_6.xml │ ├── Maven__org_apache_avro_avro_mapred_hadoop2_1_7_7.xml │ ├── Maven__org_json4s_json4s_core_2_11_3_2_11.xml │ ├── Maven__commons_digester_commons_digester_1_8.xml │ ├── Maven__io_dropwizard_metrics_metrics_jvm_3_1_2.xml │ ├── Maven__org_apache_hadoop_hadoop_client_2_2_0.xml │ ├── Maven__org_apache_hadoop_hadoop_common_2_2_0.xml │ ├── Maven__org_apache_spark_spark_sql_2_11_2_0_1.xml │ ├── Maven__org_glassfish_hk2_hk2_utils_2_4_0_b34.xml │ ├── Maven__org_scala_lang_scala_compiler_2_11_8.xml │ ├── Maven__org_xerial_snappy_snappy_java_1_1_2_6.xml │ ├── Maven__org_apache_commons_commons_lang3_3_3_2.xml │ ├── Maven__org_roaringbitmap_RoaringBitmap_0_5_11.xml │ ├── Maven__com_google_protobuf_protobuf_java_2_5_0.xml │ ├── Maven__com_univocity_univocity_parsers_2_1_1.xml │ ├── Maven__io_dropwizard_metrics_metrics_core_3_1_2.xml │ ├── Maven__io_dropwizard_metrics_metrics_json_3_1_2.xml │ ├── Maven__javax_servlet_javax_servlet_api_3_1_0.xml │ ├── Maven__org_apache_curator_curator_client_2_4_0.xml │ ├── Maven__org_apache_parquet_parquet_column_1_7_0.xml │ ├── Maven__org_apache_parquet_parquet_common_1_7_0.xml │ ├── Maven__org_apache_parquet_parquet_hadoop_1_7_0.xml │ ├── Maven__org_apache_spark_spark_core_2_11_2_0_1.xml │ ├── Maven__org_apache_spark_spark_tags_2_11_2_0_1.xml │ ├── Maven__org_apache_xbean_xbean_asm5_shaded_4_4.xml │ ├── Maven__org_apache_hadoop_hadoop_yarn_api_2_2_0.xml │ ├── Maven__org_apache_spark_spark_mllib_2_11_2_0_1.xml │ ├── Maven__org_glassfish_hk2_hk2_locator_2_4_0_b34.xml │ ├── Maven__org_spire_math_spire_macros_2_11_0_7_4.xml │ ├── Maven__org_apache_curator_curator_recipes_2_4_0.xml │ ├── Maven__org_apache_parquet_parquet_jackson_1_7_0.xml │ ├── Maven__org_json4s_json4s_jackson_2_11_3_2_11.xml │ ├── Maven__org_scalanlp_breeze_macros_2_11_0_11_2.xml │ ├── Maven__commons_httpclient_commons_httpclient_3_1.xml │ ├── Maven__org_apache_commons_commons_compress_1_4_1.xml │ ├── Maven__org_apache_parquet_parquet_encoding_1_7_0.xml │ ├── Maven__org_apache_spark_spark_graphx_2_11_2_0_1.xml │ ├── Maven__org_apache_spark_spark_sketch_2_11_2_0_1.xml │ ├── Maven__org_apache_spark_spark_unsafe_2_11_2_0_1.xml │ ├── Maven__commons_beanutils_commons_beanutils_1_7_0.xml │ ├── Maven__org_codehaus_janino_commons_compiler_2_7_8.xml │ ├── Maven__org_fusesource_leveldbjni_leveldbjni_all_1_8.xml │ ├── Maven__org_scala_lang_modules_scala_xml_2_11_1_0_4.xml │ ├── Maven__com_fasterxml_jackson_core_jackson_core_2_6_5.xml │ ├── Maven__javax_annotation_javax_annotation_api_1_2.xml │ ├── Maven__org_apache_curator_curator_framework_2_4_0.xml │ ├── Maven__org_apache_parquet_parquet_generator_1_7_0.xml │ ├── Maven__io_dropwizard_metrics_metrics_graphite_3_1_2.xml │ ├── Maven__net_sourceforge_f2j_arpack_combined_all_0_1.xml │ ├── Maven__org_apache_hadoop_hadoop_annotations_2_2_0.xml │ ├── Maven__org_apache_hadoop_hadoop_yarn_client_2_2_0.xml │ ├── Maven__org_apache_hadoop_hadoop_yarn_common_2_2_0.xml │ ├── Maven__org_apache_spark_spark_catalyst_2_11_2_0_1.xml │ ├── Maven__org_apache_spark_spark_launcher_2_11_2_0_1.xml │ ├── Maven__org_codehaus_jackson_jackson_core_asl_1_9_13.xml │ ├── Maven__org_glassfish_jersey_core_jersey_client_2_22_2.xml │ ├── Maven__org_glassfish_jersey_core_jersey_common_2_22_2.xml │ ├── Maven__org_glassfish_jersey_core_jersey_server_2_22_2.xml │ ├── Maven__javax_validation_validation_api_1_1_0_Final.xml │ ├── Maven__org_apache_spark_spark_streaming_2_11_2_0_1.xml │ ├── Maven__commons_collections_commons_collections_3_2_1.xml │ ├── Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_13.xml │ ├── Maven__org_glassfish_hk2_osgi_resource_locator_1_0_1.xml │ ├── Maven__com_fasterxml_jackson_core_jackson_databind_2_6_5.xml │ ├── Maven__commons_configuration_commons_configuration_1_6.xml │ ├── Maven__org_apache_spark_spark_mllib_local_2_11_2_0_1.xml │ ├── Maven__org_glassfish_hk2_external_javax_inject_2_4_0_b34.xml │ ├── Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml │ ├── Maven__org_glassfish_jersey_media_jersey_media_jaxb_2_22_2.xml │ ├── Maven__com_fasterxml_jackson_core_jackson_annotations_2_6_5.xml │ ├── Maven__org_apache_spark_spark_network_common_2_11_2_0_1.xml │ ├── Maven__org_apache_hadoop_hadoop_yarn_server_common_2_2_0.xml │ ├── Maven__org_apache_spark_spark_network_shuffle_2_11_2_0_1.xml │ ├── Maven__org_apache_parquet_parquet_format_2_3_0_incubating.xml │ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_app_2_2_0.xml │ ├── Maven__org_glassfish_jersey_bundles_repackaged_jersey_guava_2_22_2.xml │ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_core_2_2_0.xml │ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_common_2_2_0.xml │ ├── Maven__com_fasterxml_jackson_module_jackson_module_paranamer_2_6_5.xml │ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_shuffle_2_2_0.xml │ ├── Maven__org_glassfish_hk2_external_aopalliance_repackaged_2_4_0_b34.xml │ ├── Maven__com_fasterxml_jackson_module_jackson_module_scala_2_11_2_6_5.xml │ ├── Maven__org_scala_lang_modules_scala_parser_combinators_2_11_1_0_4.xml │ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_jobclient_2_2_0.xml │ ├── Maven__org_glassfish_jersey_containers_jersey_container_servlet_2_22_2.xml │ ├── Maven__org_glassfish_jersey_containers_jersey_container_servlet_core_2_22_2.xml │ └── Maven__org_scala_lang_scala_library_2_11_8.xml ├── modules.xml ├── misc.xml ├── compiler.xml └── uiDesigner.xml ├── README.md ├── data └── testMF.txt └── src ├── main └── scala │ ├── utils │ ├── WorkerStore.scala │ └── XORShiftRandom.scala │ ├── linalg │ ├── package.scala │ └── Vector.scala │ ├── context │ ├── Instance.scala │ └── implicits.scala │ ├── examples │ ├── LoadFile.scala │ ├── testScaller.scala │ ├── testKMeans.scala │ ├── testLR.scala │ ├── testSVD.scala │ ├── testPCA.scala │ └── testCF.scala │ ├── regression │ ├── Lasso.scala │ └── LinearRegression.scala │ ├── collaborativeFiltering │ ├── MatrixFactorizationModel.scala │ └── MatrixFactorization.scala │ ├── classification │ ├── SVM.scala │ └── LogisticRegression.scala │ ├── dimReduction │ ├── SVD.scala │ ├── PCA.scala │ └── GLS_Matrix_Batch.scala │ ├── generalizedLinear │ └── Regularizer.scala │ ├── clustering │ └── KMeans.scala │ └── features │ └── Scaller.scala └── test └── scala └── linalg └── VectorsOpt.scala /.idea/.name: -------------------------------------------------------------------------------- 1 | libble-spark -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LIBBLE-Spark 2 | 3 | LIBBLE-Spark is a library for big data machine learning on Spark. Please visit http://www.libble.ml for more details. 4 | -------------------------------------------------------------------------------- /.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /data/testMF.txt: -------------------------------------------------------------------------------- 1 | 1,1,3.0 2 | 1,2,4.0 3 | 1,3,2.8 4 | 1,4,4.0 5 | 1,5,3.7 6 | 1,6,4.7 7 | 2,1,2.0 8 | 2,2,5.0 9 | 2,3,4.8 10 | 2,4,2.6 11 | 2,5,4.2 12 | 2,6,3.0 13 | 3,1,4.3 14 | 3,2,3.2 15 | 3,3,5.0 16 | 3,4,4.9 17 | 3,5,3.2 18 | 3,6,4.0 19 | 4,1,3.0 20 | 4,2,4.3 21 | 4,3,4.3 22 | 4,4,1.0 23 | 4,5,3.2 24 | 4,6,2.3 25 | 5,1,4.0 26 | 5,2,4.3 27 | 5,3,4.5 28 | 5,4,2.3 29 | 5,5,2.0 30 | 5,6,1.0 -------------------------------------------------------------------------------- /.idea/scala_compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 9 | 10 | -------------------------------------------------------------------------------- /src/main/scala/utils/WorkerStore.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by syh on 2016/12/9. 3 | */ 4 | package libble.utils 5 | 6 | import scala.collection.mutable.{Map => mutableMap} 7 | 8 | class WorkerStore() { 9 | val store = mutableMap[String, Any]() 10 | 11 | def get[T](key: String): T = { 12 | store(key).asInstanceOf[T] 13 | } 14 | 15 | def put(key: String, value: Any) = { 16 | store += (key -> value) 17 | } 18 | } -------------------------------------------------------------------------------- /.idea/libraries/Maven__oro_oro_2_0_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__junit_junit_4_12.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_tukaani_xz_1_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__log4j_log4j_1_2_17.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__xmlenc_xmlenc_0_52.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__net_sf_py4j_py4j_0_10_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_ivy_ivy_2_4_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__net_jpountz_lz4_lz4_1_3_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_avro_avro_1_7_7.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_google_inject_guice_3_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__net_razorvine_pyrolite_4_9.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__net_sf_opencsv_opencsv_2_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_io_commons_io_2_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_objenesis_objenesis_2_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__aopalliance_aopalliance_1_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_google_guava_guava_14_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_net_commons_net_2_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_netty_netty_3_8_0_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__javax_inject_javax_inject_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scala_lang_scalap_2_11_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_21.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_jpmml_pmml_model_1_2_15.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_ning_compress_lzf_1_0_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_twitter_chill_2_11_0_8_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_twitter_chill_java_0_8_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_lang_commons_lang_2_5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__net_java_dev_jets3t_jets3t_0_7_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_avro_avro_ipc_1_7_7.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_codehaus_janino_janino_2_7_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_jpmml_pmml_schema_1_2_15.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_esotericsoftware_minlog_1_3_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_github_fommil_netlib_core_1_1_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_codec_commons_codec_1_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_slf4j_jul_to_slf4j_1_7_16.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_spire_math_spire_2_11_0_7_4.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_github_rwl_jtransforms_2_4_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_antlr_antlr4_runtime_4_5_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scalanlp_breeze_2_11_0_11_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_slf4j_slf4j_log4j12_1_7_21.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scala_sbt_test_interface_1_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_spark_project_spark_unused_1_0_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_google_code_findbugs_jsr305_1_3_9.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_netty_netty_all_4_0_29_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_avro_avro_ipc_tests_1_7_7.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_slf4j_jcl_over_slf4j_1_7_16.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_clearspring_analytics_stream_2_7_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_novocode_junit_interface_0_11.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_auth_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_hdfs_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_zookeeper_zookeeper_3_4_5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_hk2_hk2_api_2_4_0_b34.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_javassist_javassist_3_18_1_GA.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_mortbay_jetty_jetty_util_6_1_26.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__javax_ws_rs_javax_ws_rs_api_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_mesos_mesos_shaded_protobuf_0_21_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_json4s_json4s_ast_2_11_3_2_11.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scala_lang_scala_actors_2_11_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scalatest_scalatest_2_11_2_2_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scala_lang_scala_reflect_2_11_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_esotericsoftware_kryo_shaded_3_0_3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_thoughtworks_paranamer_paranamer_2_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_avro_avro_mapred_hadoop2_1_7_7.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_json4s_json4s_core_2_11_3_2_11.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_dropwizard_metrics_metrics_jvm_3_1_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_client_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_common_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_sql_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_hk2_hk2_utils_2_4_0_b34.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scala_lang_scala_compiler_2_11_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_xerial_snappy_snappy_java_1_1_2_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_commons_commons_lang3_3_3_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_roaringbitmap_RoaringBitmap_0_5_11.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_google_protobuf_protobuf_java_2_5_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_univocity_univocity_parsers_2_1_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_dropwizard_metrics_metrics_core_3_1_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_dropwizard_metrics_metrics_json_3_1_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__javax_servlet_javax_servlet_api_3_1_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_curator_curator_client_2_4_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_parquet_parquet_column_1_7_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_parquet_parquet_common_1_7_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_parquet_parquet_hadoop_1_7_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_core_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_tags_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_xbean_xbean_asm5_shaded_4_4.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_api_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_mllib_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_hk2_hk2_locator_2_4_0_b34.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_spire_math_spire_macros_2_11_0_7_4.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_curator_curator_recipes_2_4_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_parquet_parquet_jackson_1_7_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_json4s_json4s_jackson_2_11_3_2_11.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scalanlp_breeze_macros_2_11_0_11_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_commons_commons_compress_1_4_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_parquet_parquet_encoding_1_7_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_graphx_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_sketch_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_unsafe_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_codehaus_janino_commons_compiler_2_7_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_fusesource_leveldbjni_leveldbjni_all_1_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scala_lang_modules_scala_xml_2_11_1_0_4.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_core_2_6_5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__javax_annotation_javax_annotation_api_1_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_curator_curator_framework_2_4_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_parquet_parquet_generator_1_7_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__io_dropwizard_metrics_metrics_graphite_3_1_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__net_sourceforge_f2j_arpack_combined_all_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_annotations_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_client_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_common_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_catalyst_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_launcher_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_13.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_jersey_core_jersey_client_2_22_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_jersey_core_jersey_common_2_22_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_jersey_core_jersey_server_2_22_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__javax_validation_validation_api_1_1_0_Final.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_streaming_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_13.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_hk2_osgi_resource_locator_1_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_6_5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_mllib_local_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_hk2_external_javax_inject_2_4_0_b34.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_jersey_media_jersey_media_jaxb_2_22_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_annotations_2_6_5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_network_common_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_server_common_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_spark_spark_network_shuffle_2_11_2_0_1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_parquet_parquet_format_2_3_0_incubating.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_app_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_jersey_bundles_repackaged_jersey_guava_2_22_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_core_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_common_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_module_jackson_module_paranamer_2_6_5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_shuffle_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_hk2_external_aopalliance_repackaged_2_4_0_b34.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__com_fasterxml_jackson_module_jackson_module_scala_2_11_2_6_5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scala_lang_modules_scala_parser_combinators_2_11_1_0_4.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_jobclient_2_2_0.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_jersey_containers_jersey_container_servlet_2_22_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_glassfish_jersey_containers_jersey_container_servlet_core_2_22_2.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/test/scala/linalg/VectorsOpt.scala: -------------------------------------------------------------------------------- 1 | package libble.linalg 2 | 3 | import org.scalatest.FunSuite 4 | 5 | /** 6 | * Created by Aplysia_x on 2016/11/7. 7 | */ 8 | class VectorsOpt extends FunSuite { 9 | val sparse = new SparseVector(Array(0, 2), Array(1.0, 3.0), 3) 10 | val dense = new DenseVector(Array(1.0, 2.0, 3.0)) 11 | import libble.linalg.implicits.vectorAdOps 12 | 13 | test("norm1"){ 14 | assert(sparse.norm1()==4) 15 | assert(dense.norm1()==6) 16 | } 17 | test("norm2"){ 18 | assert(sparse.norm2()==math.sqrt(10)) 19 | assert(dense.norm2()==math.sqrt(14)) 20 | } 21 | 22 | 23 | test("dot"){ 24 | assert(sparse*dense==10) 25 | } 26 | 27 | test("plusax"){ 28 | assert(dense.plusax(1.0,sparse).norm1==10) 29 | } 30 | 31 | test("scal"){ 32 | assert(dense.scal(2.0).norm1()==20) 33 | } 34 | 35 | 36 | 37 | 38 | } 39 | -------------------------------------------------------------------------------- /.idea/copyright/libble.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /src/main/scala/linalg/package.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * You may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | package libble.linalg 16 | 17 | import scala.language.implicitConversions 18 | /** 19 | * Here define the implicit method for converting the Vector to VectorsOp. 20 | */ 21 | package object implicits { 22 | implicit def vectorAdOps(vec: Vector) = new VectorsOp(vec) 23 | } 24 | -------------------------------------------------------------------------------- /.idea/libraries/Maven__org_scala_lang_scala_library_2_11_8.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /src/main/scala/context/Instance.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | package libble.context 16 | 17 | import libble.linalg.Vector 18 | 19 | /** 20 | * This class is used to denote one term of the training or testing data, which consists of 21 | * one label and one Vector. 22 | * @param label 23 | * @param features 24 | */ 25 | case class Instance(val label: Double, val features: Vector) { 26 | override def toString: String = s"($label, $features)" 27 | } 28 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/scala/utils/XORShiftRandom.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Created by syh on 2016/12/9. 3 | */ 4 | package libble.utils 5 | 6 | import java.nio.ByteBuffer 7 | import java.util.{Random => JavaRandom} 8 | 9 | import scala.util.hashing.MurmurHash3 10 | 11 | /** 12 | * This part of code is borrowed from Spark MLlib. 13 | */ 14 | class XORShiftRandom(init: Long) extends JavaRandom(init) { 15 | 16 | def this() = this(System.nanoTime) 17 | 18 | private var seed = XORShiftRandom.hashSeed(init) 19 | 20 | // we need to just override next - this will be called by nextInt, nextDouble, 21 | // nextGaussian, nextLong, etc. 22 | override protected def next(bits: Int): Int = { 23 | var nextSeed = seed ^ (seed << 21) 24 | nextSeed ^= (nextSeed >>> 35) 25 | nextSeed ^= (nextSeed << 4) 26 | seed = nextSeed 27 | (nextSeed & ((1L << bits) -1)).asInstanceOf[Int] 28 | } 29 | 30 | override def setSeed(s: Long) { 31 | seed = XORShiftRandom.hashSeed(s) 32 | } 33 | } 34 | 35 | object XORShiftRandom { 36 | /** Hash seeds to have 0/1 bits throughout. */ 37 | private def hashSeed(seed: Long): Long = { 38 | val bytes = ByteBuffer.allocate(java.lang.Long.SIZE).putLong(seed).array() 39 | MurmurHash3.bytesHash(bytes) 40 | } 41 | } -------------------------------------------------------------------------------- /src/main/scala/examples/LoadFile.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * You may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | package libble.examples 16 | 17 | import org.apache.spark.{SparkConf, SparkContext} 18 | 19 | /*** 20 | * Here we test the function of loadlibSVMFile and loadLIBBLEFile. 21 | */ 22 | object LoadFile { 23 | def main(args: Array[String]) { 24 | 25 | val conf = new SparkConf() 26 | .setAppName("myTest") 27 | val sc = new SparkContext(conf) 28 | 29 | import libble.context.implicits._ 30 | val training=sc.loadLIBBLEFile("sparse.data") 31 | println(training.count()) 32 | training.saveAsLIBBLEFile("this.data") 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /src/main/scala/regression/Lasso.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * You may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | package libble.regression 16 | 17 | import libble.generalizedLinear._ 18 | 19 | /** 20 | * This class is the model of LinearRegression with default regularization L1Reg. 21 | * 22 | * @param stepSize 23 | * @param regParam 24 | * @param factor 25 | * @param iters 26 | * @param partsNum 27 | */ 28 | class Lasso(stepSize: Double, 29 | regParam: Double, 30 | factor: Double, 31 | iters: Int, 32 | partsNum: Int) extends LinearScope(stepSize, regParam, factor, iters, partsNum) { 33 | def this() = this(1.0, 0.0001, 0.0001, 5, -1) 34 | 35 | setLossFunc(new LeastSquareLoss()) 36 | setUpdater(new L1Updater()) 37 | 38 | 39 | 40 | clearThreshold 41 | 42 | 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/main/scala/collaborativeFiltering/MatrixFactorizationModel.scala: -------------------------------------------------------------------------------- 1 | package libble.collaborativeFiltering 2 | 3 | import libble.linalg.Vector 4 | import libble.linalg.implicits._ 5 | import org.apache.spark.rdd.RDD 6 | 7 | class MatrixFactorizationModel (rank: Int, 8 | userFactors: RDD[(Int, Vector)], 9 | itemFactors: RDD[(Int, Vector)]) extends Serializable{ 10 | def predict (userIndex: Int, itemIndex: Int) : Double = { 11 | val uh = userFactors.lookup(userIndex).head 12 | val vj = itemFactors.lookup(itemIndex).head 13 | uh * vj 14 | } 15 | def predict (indices: RDD[(Int, Int)]): RDD[Rating] = { 16 | val numUsers = indices.keys.distinct().count() 17 | val numItems = indices.values.distinct().count() 18 | if (numUsers > numItems){ 19 | itemFactors.join(indices.map(_.swap)).map{ 20 | case (item, (item_factors, user)) => (user, (item, item_factors)) 21 | } 22 | .join(userFactors).map{ 23 | case (user, ((item, item_factors), user_factors)) => 24 | new Rating(item_factors * user_factors, user, item) 25 | } 26 | } 27 | else{ 28 | userFactors.join(indices).map{ 29 | case (user, (user_factors, item)) => (item, (user, user_factors)) 30 | } 31 | .join(itemFactors).map{ 32 | case (item, ((user, user_factors), item_factors)) => 33 | new Rating(item_factors * user_factors, user, item) 34 | } 35 | } 36 | } 37 | } -------------------------------------------------------------------------------- /src/main/scala/classification/SVM.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | package libble.classification 16 | 17 | import libble.generalizedLinear.{HingeLoss, L2Updater, LinearScope} 18 | 19 | /** 20 | * This class is the model of SVM with default regularization L2Reg. 21 | * 22 | * @param stepSize 23 | * @param regParam 24 | * @param factor 25 | * @param iters 26 | * @param partsNum 27 | */ 28 | class SVM(stepSize: Double, 29 | regParam: Double, 30 | factor: Double, 31 | iters: Int, 32 | partsNum: Int) extends LinearScope(stepSize, regParam, factor, iters, partsNum) { 33 | def this() = this(1.0, 0.0001, 0.0001, 5, -1) 34 | 35 | setLossFunc(new HingeLoss) 36 | setUpdater(new L2Updater) 37 | 38 | /** 39 | * Default threshold is 0.0. 40 | */ 41 | setThreshold(0.0) 42 | 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/examples/testScaller.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * You may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | package libble.examples 16 | 17 | import libble.features.Scaller 18 | import org.apache.spark.{SparkConf, SparkContext} 19 | 20 | /** 21 | * This is an example of using Scaller. 22 | */ 23 | object testScaller { 24 | def main(args: Array[String]) { 25 | 26 | val conf = new SparkConf() 27 | .setAppName("myTest") 28 | val sc = new SparkContext(conf) 29 | 30 | import libble.context.implicits.sc2LibContext 31 | val training = sc.loadLIBBLEFile("sparse.data") 32 | 33 | val scaller = new Scaller(true, true) 34 | val features = training.map(_.features) 35 | scaller.computeFactor(features) 36 | 37 | println("center:" + scaller.getCenter.get) 38 | println("std:" + scaller.getStd.get) 39 | val result = scaller.transform(features).collect() 40 | println(result.mkString(", ")) 41 | 42 | 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/scala/regression/LinearRegression.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | package libble.regression 16 | 17 | import libble.generalizedLinear.{ L2Updater, LeastSquareLoss, LinearScope} 18 | 19 | /** 20 | * This is the model of LinearRegression with default regularization L1Reg. 21 | * 22 | * @param stepSize 23 | * @param regParam 24 | * @param factor 25 | * @param iters 26 | * @param partsNum 27 | */ 28 | class LinearRegression(stepSize: Double, 29 | regParam: Double, 30 | factor: Double, 31 | iters: Int, 32 | partsNum: Int) extends LinearScope(stepSize, regParam, factor, iters, partsNum) { 33 | def this() = this(1.0, 0.0001, 0.0001, 5, -1) 34 | 35 | setLossFunc(new LeastSquareLoss) 36 | setUpdater(new L2Updater()) 37 | 38 | /** 39 | * Set the output to be the predict value. 40 | */ 41 | clearThreshold 42 | 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/main/scala/classification/LogisticRegression.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | package libble.classification 16 | 17 | import libble.generalizedLinear.{L2Updater, LinearScope, LogisticLoss} 18 | 19 | /** 20 | * This class is the model of LogisticRegression with default regularization L2Reg. 21 | * 22 | * @param stepSize 23 | * @param regParam 24 | * @param factor 25 | * @param iters 26 | * @param partsNum 27 | */ 28 | class LogisticRegression(stepSize: Double, 29 | regParam: Double, 30 | factor: Double, 31 | iters: Int, 32 | partsNum: Int) extends LinearScope(stepSize, regParam, factor, iters, partsNum) { 33 | def this() = this(1.0, 0.0001, 0.0001, 5, -1) 34 | 35 | setLossFunc(new LogisticLoss()) 36 | setUpdater(new L2Updater()) 37 | 38 | 39 | /** 40 | * Default threshold is 0.5. 41 | */ 42 | setThreshold(0.5) 43 | 44 | /** 45 | * Set the classNum 46 | * 47 | * @param classNum 48 | * @return this 49 | */ 50 | override def setClassNum(classNum: Int): LogisticRegression.this.type ={ 51 | super.setClassNum(classNum) 52 | setLossFunc(new LogisticLoss(classNum)) 53 | 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/scala/examples/testKMeans.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * You may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package libble.examples 17 | 18 | import libble.clustering.KMeans 19 | import org.apache.log4j.{Level, Logger} 20 | import org.apache.spark.{SparkConf, SparkContext} 21 | 22 | import scala.collection.mutable 23 | 24 | /** 25 | * Created by Aplysia_x on 2016/12/9. 26 | */ 27 | object testKMeans { 28 | def main(args: Array[String]) { 29 | 30 | if (args.length < 1) { 31 | System.err.println("Usage: ~ path:String --k=Int --maxIters=Int --stopBound=Double") 32 | System.exit(1) 33 | } 34 | // System.setProperty("hadoop.home.dir", "D:\\Program Files\\hadoop-2.6.0") 35 | 36 | val optionsList = args.drop(1).map { arg => 37 | arg.dropWhile(_ == '-').split('=') match { 38 | case Array(opt, v) => (opt -> v) 39 | case _ => throw new IllegalArgumentException("Invalid argument: " + arg) 40 | } 41 | } 42 | val options = mutable.Map(optionsList: _*) 43 | Logger.getLogger("org.apache.spark").setLevel(Level.WARN) 44 | Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF) 45 | 46 | val conf = new SparkConf() 47 | .setAppName("My Test Kmeans") 48 | val sc = new SparkContext(conf) 49 | 50 | val k = options.remove("k").map(_.toInt).getOrElse(10) 51 | val maxIters = options.remove("maxIters").map(_.toInt).getOrElse(10) 52 | val stopBound = options.remove("stopBound").map(_.toDouble).getOrElse(0.0001) 53 | 54 | import libble.context.implicits.sc2LibContext 55 | val training = sc.loadLIBBLEFile(args(0)) 56 | val m = new KMeans(k, maxIters, stopBound) 57 | val data = training.map(e => (e.label, e.features)) 58 | m.train(data) 59 | } 60 | } -------------------------------------------------------------------------------- /src/main/scala/dimReduction/SVD.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * You may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package libble.dimReduction 17 | 18 | import java.util.Calendar 19 | 20 | import libble.linalg.Vector 21 | import libble.linalg.implicits._ 22 | import org.apache.spark.rdd.RDD 23 | 24 | import scala.collection.mutable.ArrayBuffer 25 | 26 | 27 | /** 28 | * This is the model of SVD 29 | * 30 | * @param K 31 | * @param bound 32 | * @param stepSize 33 | * @param iteration 34 | * @param parts 35 | * @param batchSize 36 | */ 37 | 38 | class SVD(var K: Int, 39 | var bound: Double, 40 | var stepSize: Double, 41 | var iteration: Int, 42 | var parts: Int, 43 | var batchSize: Int) extends Serializable { 44 | var eigenvalues = new ArrayBuffer[Double]() 45 | var eigenvectors = new ArrayBuffer[Vector]() 46 | 47 | 48 | /** 49 | * 50 | * This method generates singular values matrix and right singular vectors. 51 | * 52 | * @param training 53 | */ 54 | def train(training: RDD[Vector]): (Array[Double], Array[Vector]) = { 55 | val st = Calendar.getInstance().getTimeInMillis 56 | val m = new GLS_Matrix_Batch(stepSize, 0.0, 0.0, iteration, parts, batchSize, K) 57 | m.setStopBound(bound) 58 | val model = m.train(training) 59 | 60 | /** 61 | * 62 | * v is the right singular matrix 63 | * Singular values matrix which is square root of eigenvalues matrix. 64 | * 65 | */ 66 | for (k <- 0 to K - 1) { 67 | val v = model._1(k) 68 | val lambda = training.map(x => Math.pow(x * v, 2)).reduce(_ + _) 69 | eigenvalues.append(math.sqrt(lambda)) 70 | eigenvectors.append(v) 71 | } 72 | 73 | println(s"time to calculate the top ${K} eigen is: " + (Calendar.getInstance().getTimeInMillis - st)) 74 | (eigenvalues.toArray, eigenvectors.toArray) 75 | 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /src/main/scala/examples/testLR.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 4 | * All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * You may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | package libble.examples 18 | 19 | import libble.classification.LogisticRegression 20 | import org.apache.log4j.{Level, Logger} 21 | import org.apache.spark.{SparkConf, SparkContext} 22 | 23 | import scala.collection.mutable 24 | 25 | /** * 26 | * Here is the example of using LogisticRegression. 27 | */ 28 | object testLR { 29 | def main(args: Array[String]) { 30 | 31 | if (args.length < 1) { 32 | System.err.println("Usage: ~ path:String --elasticF=Double --numIters=Int --stepSize=Double --regParam=Double --nuPart=Int --numClasses=Int") 33 | System.exit(1) 34 | } 35 | 36 | val optionsList = args.drop(1).map { arg => 37 | arg.dropWhile(_ == '-').split('=') match { 38 | case Array(opt, v) => (opt -> v) 39 | case _ => throw new IllegalArgumentException("Invalid argument: " + arg) 40 | } 41 | } 42 | val options = mutable.Map(optionsList: _*) 43 | Logger.getLogger("org.apache.spark").setLevel(Level.WARN) 44 | Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF) 45 | 46 | val conf = new SparkConf() 47 | .setAppName("myTest") 48 | val sc = new SparkContext(conf) 49 | 50 | val stepSize = options.remove("stepSize").map(_.toDouble).getOrElse(1.0) 51 | val regParam = options.remove("regParam").map(_.toDouble).getOrElse(0.00001) 52 | val numIter = options.remove("numIters").map(_.toInt).getOrElse(5) 53 | val elasticF = options.remove("elasticF").map(_.toDouble).getOrElse(0.00001) 54 | val numPart = options.remove("numPart").map(_.toInt).getOrElse(20) 55 | val numClasses = options.remove("numClasses").map(_.toInt).getOrElse(2) 56 | import libble.context.implicits.sc2LibContext 57 | val training = sc.loadLIBBLEFile(args(0), numPart) 58 | val m = new LogisticRegression(stepSize, regParam, elasticF, numIter, numPart) 59 | m.train(training) 60 | 61 | 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/scala/examples/testSVD.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | package libble.examples 16 | 17 | import libble.dimReduction.SVD 18 | import org.apache.log4j.{Level, Logger} 19 | import org.apache.spark.{SparkConf, SparkContext} 20 | 21 | import scala.collection.mutable 22 | 23 | /** 24 | * This is an example of using SVD. 25 | */ 26 | object testSVD { 27 | def main(args: Array[String]): Unit = { 28 | Logger.getLogger("org.apache.spark").setLevel(Level.WARN) 29 | Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF) 30 | System.setProperty("spark.ui.port", "4042") 31 | System.setProperty("spark.akka.frameSize", "100") 32 | 33 | val conf = new SparkConf().setAppName("testSVD") 34 | conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 35 | conf.set("spark.kryoserializer.buffer.max", "2000m") 36 | val sc = new SparkContext(conf) 37 | 38 | if (args.length < 5) { 39 | System.err.println("Usage: ~ path:String --elasticF=Double --numIters=Int --stepSize=Double --regParam=Double --nuPart=Int --numClasses=Int") 40 | System.exit(1) 41 | } 42 | val optionsList = args.drop(1).map { arg => 43 | arg.dropWhile(_ == '-').split('=') match { 44 | case Array(opt, v) => (opt -> v) 45 | case _ => throw new IllegalArgumentException("Invalid argument: " + arg) 46 | } 47 | } 48 | val options = mutable.Map(optionsList: _*) 49 | 50 | val stepSize = options.remove("stepSize").map(_.toDouble).getOrElse(0.1) 51 | val numIters = options.remove("numIters").map(_.toInt).getOrElse(10) 52 | val numPart = options.remove("numPart").map(_.toInt).getOrElse(2) 53 | val K = options.remove("k").map(_.toInt).getOrElse(1) 54 | val bound = options.remove("bound").map(_.toDouble).getOrElse(1e-6) 55 | val batchSize = options.remove("batchSize").map(_.toInt).getOrElse(100) 56 | 57 | /* 58 | * Scope SVD 59 | */ 60 | import libble.context.implicits._ 61 | val training = sc.loadLIBBLEFile(args(0)).map(_.features) 62 | 63 | val mysvd = new SVD(K, bound, stepSize, numIters, numPart, batchSize) //matrix, altogether update eigens 64 | 65 | val SVDModel = mysvd.train(training) 66 | 67 | val sigma = SVDModel._1 68 | val v = SVDModel._2 69 | 70 | sigma.foreach(x=>print(x+",")) 71 | v.foreach(x=>println(x)) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/scala/examples/testPCA.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | package libble.examples 16 | 17 | import libble.dimReduction.PCA 18 | import org.apache.log4j.{Level, Logger} 19 | import org.apache.spark.{SparkContext, SparkConf} 20 | 21 | import scala.collection.mutable 22 | 23 | /** 24 | * This is an example of using PCA. 25 | */ 26 | object testPCA { 27 | def main(args: Array[String]): Unit = { 28 | Logger.getLogger("org.apache.spark").setLevel(Level.WARN) 29 | Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF) 30 | System.setProperty("spark.ui.port", "4042") 31 | System.setProperty("spark.akka.frameSize", "100") 32 | 33 | val conf = new SparkConf().setAppName("testPCA") 34 | conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") 35 | conf.set("spark.kryoserializer.buffer.max", "2000m") 36 | val sc = new SparkContext(conf) 37 | 38 | if (args.length < 5) { 39 | System.err.println("Usage: ~ path:String --elasticF=Double --numIters=Int --stepSize=Double --regParam=Double --nuPart=Int --numClasses=Int") 40 | System.exit(1) 41 | } 42 | val optionsList = args.drop(1).map { arg => 43 | arg.dropWhile(_ == '-').split('=') match { 44 | case Array(opt, v) => (opt -> v) 45 | case _ => throw new IllegalArgumentException("Invalid argument: " + arg) 46 | } 47 | } 48 | val options = mutable.Map(optionsList: _*) 49 | 50 | val stepSize = options.remove("stepSize").map(_.toDouble).getOrElse(0.1) 51 | val numIters = options.remove("numIters").map(_.toInt).getOrElse(10) 52 | val numPart = options.remove("numPart").map(_.toInt).getOrElse(2) 53 | val K = options.remove("k").map(_.toInt).getOrElse(1) 54 | val bound = options.remove("bound").map(_.toDouble).getOrElse(1e-6) 55 | val batchSize = options.remove("batchSize").map(_.toInt).getOrElse(100) 56 | 57 | /* 58 | * Scope PCA 59 | */ 60 | import libble.context.implicits._ 61 | val training = sc.loadLIBBLEFile(args(0)).map(_.features) 62 | 63 | val mypca = new PCA(K, bound, stepSize, numIters, numPart, batchSize) //matrix, altogether update eigens 64 | val PCAModel = mypca.train(training) 65 | 66 | val pc = PCAModel._2 67 | pc.foreach(x => println(x)) 68 | val projected = mypca.transform(training, pc) 69 | projected.collect().foreach(x => println(x)) 70 | 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/scala/examples/testCF.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 4 | * All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * You may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | /** 19 | * We licence this file to you under the Apache Licence 2.0; you could get a copy 20 | * of the licence from http://www.apache.org/licenses/LICENSE-2.0. 21 | */ 22 | package libble.examples 23 | 24 | import libble.collaborativeFiltering.{ MatrixFactorizationByScope, MatrixFactorization, Rating} 25 | import org.apache.log4j.{Level, Logger} 26 | import org.apache.spark.{SparkConf, SparkContext} 27 | 28 | import scala.collection.mutable 29 | 30 | 31 | /*** 32 | * Here is the example of using Matrix Factorization. 33 | */ 34 | object testCF { 35 | def main(args: Array[String]) { 36 | val optionsList = args.map { arg => 37 | arg.dropWhile(_ == '-').split('=') match { 38 | case Array(opt, v) => (opt -> v) 39 | case _ => throw new IllegalArgumentException("Invalid argument: " + arg) 40 | } 41 | } 42 | val options = mutable.Map(optionsList: _*) 43 | Logger.getLogger("org.apache.spark").setLevel(Level.WARN) 44 | Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF) 45 | 46 | val conf = new SparkConf() 47 | .setAppName("testMF") 48 | val sc = new SparkContext(conf) 49 | 50 | val trainsetPath = options.remove("trainset").map(_.toString).getOrElse("data\\testMF.txt") 51 | val stepsize = options.remove("stepsize").map(_.toDouble).getOrElse(0.01) 52 | val regParam_u = options.remove("regParam_u").map(_.toDouble).getOrElse(0.05) 53 | val regParam_v = options.remove("regParam_u").map(_.toDouble).getOrElse(0.05) 54 | val numIters = options.remove("numIters").map(_.toInt).getOrElse(50) 55 | val numParts = options.remove("numParts").map(_.toInt).getOrElse(16) 56 | val rank = options.remove("rank").map(_.toInt).getOrElse(40) 57 | val testsetPath = options.remove("testset").map(_.toString) 58 | val stepsize2 = options.remove("stepsize2").map(_.toDouble).getOrElse(0.1) 59 | val ifPrintLoss = options.remove("ifPrintLoss").map(_.toInt).getOrElse(0) 60 | 61 | val trainSet = sc.textFile(trainsetPath, numParts) 62 | .map(_.split(',') match { case Array(user, item, rate) => 63 | Rating(rate.toDouble, user.toInt, item.toInt) 64 | }) 65 | 66 | val model = new MatrixFactorization() 67 | .train(trainSet, 68 | numIters, 69 | numParts, 70 | rank, 71 | regParam_u, 72 | regParam_v, 73 | stepsize, 74 | ifPrintLoss) 75 | 76 | if(testsetPath.isDefined) { 77 | val testSet = sc.textFile(testsetPath.get, numParts) 78 | .map(_.split(',') match { case Array(user, item, rate) => 79 | Rating(rate.toDouble, user.toInt, item.toInt) 80 | }) 81 | 82 | val result = model.predict(testSet.map(r => (r.index_x, r.index_y))) 83 | val joinRDD = result.map(r => ((r.index_x, r.index_y), r.rating)) 84 | .join(testSet.map(r => ((r.index_x, r.index_y), r.rating))) 85 | 86 | // println(s"size of testSet: ${testSet.count()}") 87 | // println(s"size of joinRDD: ${joinRDD.count()}") 88 | val rmse = joinRDD.values 89 | .map(i => math.pow(i._1 - i._2, 2)) 90 | .mean() 91 | println(s"rmse of test set: ${math.sqrt(rmse)}") 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/main/scala/generalizedLinear/Regularizer.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * We licence this file to you under the Apache Licence 2.0; you could get a copy 3 | * of the licence from http://www.apache.org/licenses/LICENSE-2.0. 4 | */ 5 | package libble.generalizedLinear 6 | 7 | import libble.linalg.Vector 8 | import libble.linalg.implicits._ 9 | 10 | /** 11 | * 12 | */ 13 | abstract class Updater extends Serializable { 14 | /** 15 | * In this method, we update the weight with weightnew= weightOld+stepSize*(gradient+regParam* delte(regularizer)). 16 | * Where delta(regularizer) is the gradient of regularizer. 17 | * 18 | * @param weights 19 | * @param gradient 20 | * @param stepSize 21 | * @param regParam 22 | * @return weightNew 23 | */ 24 | def update(weights: Vector, gradient: Vector, stepSize: Double, regParam: Double): Unit 25 | 26 | /** 27 | * In this method, we give the cost of the regularizer 28 | * 29 | * @param weight 30 | * @param regParam 31 | * @return regCost 32 | */ 33 | def getRegVal(weight: Vector, regParam: Double): Double 34 | 35 | } 36 | 37 | /** 38 | * 39 | */ 40 | class simpleUpdater extends Updater { 41 | /** 42 | * In this method, we update the weight with weightnew= weightOld+stepSize*(gradient+regParam* delte(regularizer)). 43 | * Where delta(regularizer) is the gradient of regularizer. 44 | * 45 | * @param weights 46 | * @param gradient 47 | * @param stepSize 48 | * @param regParam 49 | * @return weightNew 50 | */ 51 | override def update(weights: Vector, gradient: Vector, stepSize: Double, regParam: Double): Unit = { 52 | weights.plusax(-stepSize, gradient) 53 | } 54 | 55 | /** 56 | * In this method, we give the cost of the regularizer 57 | * 58 | * @param weight 59 | * @param regParam 60 | * @return regCost 61 | */ 62 | override def getRegVal(weight: Vector, regParam: Double): Double = { 63 | 0.0 64 | } 65 | } 66 | 67 | /** 68 | * 69 | */ 70 | class L1Updater extends Updater{ 71 | /** 72 | * In this method, we update the weight with weightnew= weightOld+stepSize*(gradient+regParam* delte(regularizer)). 73 | * Where delta(regularizer) is the gradient of regularizer. 74 | * 75 | * @param weights 76 | * @param gradient 77 | * @param stepSize 78 | * @param regParam 79 | * @return weightNew 80 | */ 81 | override def update(weights: Vector, gradient: Vector, stepSize: Double, regParam: Double): Unit = { 82 | weights.plusax(-stepSize, gradient) 83 | val reg_step = regParam * stepSize 84 | val weightsValues = weights.toArray 85 | var offset = 0 86 | while (offset < weights.size) { 87 | weightsValues(offset) = math.signum(weightsValues(offset)) * math.max(0.0, math.abs(weightsValues(offset) - reg_step)) 88 | offset += 1 89 | } 90 | } 91 | 92 | /** 93 | * In this method, we give the cost of the regularizer 94 | * 95 | * @param weight 96 | * @param regParam 97 | * @return regCost 98 | */ 99 | override def getRegVal(weight: Vector, regParam: Double): Double = { 100 | weight.norm1 * regParam 101 | } 102 | } 103 | 104 | /** 105 | * 106 | */ 107 | class L2Updater extends Updater { 108 | /** 109 | * In this method, we update the weight with weightnew= weightOld+stepSize*(gradient+regParam* delte(regularizer)). 110 | * Where delta(regularizer) is the gradient of regularizer. 111 | * 112 | * @param weights 113 | * @param gradient 114 | * @param stepSize 115 | * @param regParam 116 | * @return weightNew 117 | */ 118 | override def update(weights: Vector, gradient: Vector, stepSize: Double, regParam: Double): Unit = { 119 | weights *= (1 - stepSize * regParam) 120 | weights.plusax(-stepSize, gradient) 121 | } 122 | 123 | /** 124 | * In this method, we give the cost of the regularizer 125 | * 126 | * @param weight 127 | * @param regParam 128 | * @return regCost 129 | */ 130 | override def getRegVal(weight: Vector, regParam: Double): Double = { 131 | val norm = weight.norm2 132 | 0.5 * regParam * norm * norm 133 | } 134 | } 135 | 136 | -------------------------------------------------------------------------------- /src/main/scala/dimReduction/PCA.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | package libble.dimReduction 16 | 17 | import java.util.Calendar 18 | 19 | import libble.linalg.implicits._ 20 | import libble.linalg.{DenseVector, Vector} 21 | import org.apache.spark.rdd.RDD 22 | 23 | import scala.collection.mutable.ArrayBuffer 24 | 25 | /** 26 | * 27 | * This class is the model of PCA. 28 | * 29 | * @param K 30 | * @param bound 31 | * @param stepSize 32 | * @param iteration 33 | * @param parts 34 | * @param batchSize 35 | */ 36 | 37 | class PCA(var K: Int, 38 | var bound: Double, 39 | var stepSize: Double, 40 | var iteration: Int, 41 | var parts: Int, 42 | var batchSize: Int) extends Serializable { 43 | require(K >= 1, s"K is the number of principal components, it should be that K >= 1 but was given $K") 44 | 45 | var eigenvalues = new ArrayBuffer[Double]() 46 | var eigenvectors = new ArrayBuffer[Vector]() 47 | 48 | 49 | /** 50 | * 51 | * This method generates the K principle components and their relating eigenvalues. 52 | * 53 | * @param training 54 | */ 55 | def train(training: RDD[Vector]): (Array[Double], Array[Vector]) = { 56 | 57 | require(K <= training.first().size, 58 | s"data dimension size is ${training.first().size}, it must be greater than K=$K") 59 | 60 | val centerData = centralize(training) 61 | 62 | val st = Calendar.getInstance().getTimeInMillis 63 | val m = new GLS_Matrix_Batch(stepSize, 0.0, 0.0, iteration, parts, batchSize, K) 64 | m.setStopBound(bound) 65 | val model = m.train(centerData) 66 | 67 | /** 68 | * v is the kth principle components. 69 | * lambda is the kth largest eigenvalues corresponding to v. 70 | */ 71 | for (k <- 0 to K - 1) { 72 | val v = model._1(k) 73 | val lambda = (1.0 / (centerData.count() - 1)) * centerData.map(x => Math.pow(x * v, 2)).reduce(_ + _) 74 | eigenvalues.append(lambda) 75 | eigenvectors.append(v) 76 | } 77 | 78 | println(s"time to calculate the top ${K} eigen is: " + (Calendar.getInstance().getTimeInMillis - st)) 79 | (eigenvalues.toArray, eigenvectors.toArray) 80 | 81 | } 82 | 83 | /** 84 | * 85 | * This method centralizes raw data which is the first step of PCA. 86 | * 87 | * @param data 88 | * 89 | */ 90 | def centralize(data: RDD[Vector]): RDD[Vector] = { 91 | val count = data.count() 92 | val numF = data.first().size 93 | val average = data.treeAggregate(new DenseVector(numF))( 94 | seqOp = (c, v) => { 95 | c += v 96 | c 97 | }, combOp = (c1, c2) => { 98 | c2 += c1 99 | c2 100 | } 101 | ) 102 | average /= count 103 | val aver = data.context.broadcast(average) 104 | 105 | val panedData = data.map { e => 106 | val newFeatures = new DenseVector(e.toArray) 107 | newFeatures -= aver.value 108 | newFeatures.vector 109 | 110 | } 111 | panedData 112 | } 113 | 114 | 115 | /** 116 | * 117 | * This method projects raw data to new feature space using principle components. 118 | * 119 | * @param rawData 120 | * @param pc 121 | * 122 | */ 123 | def transform(rawData: RDD[Vector], pc: Array[Vector]): RDD[Vector] = { 124 | val projected = rawData.map { ins => 125 | val arr = new ArrayBuffer[Double]() 126 | for (k <- pc.indices) { 127 | arr.append(ins * pc(k)) 128 | } 129 | new DenseVector(arr.toArray).vector 130 | 131 | } 132 | projected 133 | } 134 | 135 | } 136 | -------------------------------------------------------------------------------- /src/main/scala/context/implicits.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * You may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | package libble.context 16 | 17 | import libble.linalg.{DenseVector, SparseVector} 18 | import org.apache.spark.SparkContext 19 | import org.apache.spark.rdd.RDD 20 | 21 | import scala.language.implicitConversions 22 | /** 23 | * Here we define the implicit convert function. 24 | */ 25 | object implicits { 26 | implicit def sc2LibContext(sc: SparkContext) = new LibContext(sc) 27 | 28 | implicit def RDD2LIBBLERDD(data: RDD[Instance]) = new LIBBLERDD(data) 29 | } 30 | 31 | /** 32 | * This class includes the methods of loading LIBBLEFILE from the file system. 33 | * 34 | * @param sc 35 | */ 36 | class LibContext(val sc: SparkContext) { 37 | /** 38 | * Load LibSVM file from the File System with default parallelization. 39 | * 40 | * @param path 41 | * @return RDD[Instance] 42 | * @deprecated replaced by function loadLibSVMFile 43 | */ 44 | def loadLibSVMFile(path: String): RDD[Instance] = { 45 | loadLibSVMFile(path, -1) 46 | } 47 | 48 | /** 49 | * Load LibSVM file from the File System with given parallelization. 50 | * 51 | * @param path 52 | * @param partsNum 53 | * @return RDD[Instance] 54 | * @deprecated replaced by function loadLibSVMFile 55 | */ 56 | def loadLibSVMFile(path: String, partsNum: Int): RDD[Instance] = { 57 | val lines = { 58 | if (partsNum > 0) sc.textFile(path, partsNum) else sc.textFile(path) 59 | }.map(_.trim) 60 | .filter(line => !(line.isEmpty || line.startsWith("#"))) 61 | val terms = lines.filter(_.split(" ").length != 1).map { line => 62 | val items = line.split(" ") 63 | val label = items.head.toDouble 64 | val term = items.tail.filter(_.nonEmpty).map { item => 65 | val temp = item.split(":") 66 | (temp.head.toInt - 1, temp.last.toDouble) 67 | }.unzip 68 | (label, term._1, term._2) 69 | }.cache() 70 | val d = terms.map(_._2.lastOption.getOrElse(0)) 71 | .reduce(math.max) + 1 72 | terms.map { term => 73 | new Instance(term._1, new SparseVector(term._2.toArray, term._3.toArray, d)) 74 | 75 | } 76 | } 77 | 78 | /** 79 | * Load LIBBLE file from File System with default parallelization 80 | * Compatible with LibSVM file. 81 | * 82 | * @param path 83 | * @return RDD[Instance] 84 | */ 85 | def loadLIBBLEFile(path: String): RDD[Instance] = { 86 | loadLIBBLEFile(path, -1) 87 | } 88 | 89 | /** 90 | * Load LIBBLE file from File System with given parallelization. 91 | * Compatible with LibSVM file. 92 | * 93 | * @param path 94 | * @param partsNum 95 | * @return RDD[Instance] 96 | */ 97 | def loadLIBBLEFile(path: String, partsNum: Int): RDD[Instance] = { 98 | val lines = { 99 | if (partsNum > 0) sc.textFile(path, partsNum) else sc.textFile(path) 100 | }.map(_.trim) 101 | .filter(line => !(line.isEmpty || line.startsWith("#"))) 102 | lines.first().contains(":") match { 103 | case true => { 104 | val terms = lines.map { line => 105 | val items = line.split(' ') 106 | val label = items.head.toDouble 107 | val term = items.tail.filter(_.nonEmpty).map { item => 108 | val temp = item.split(':') 109 | (temp.head.toInt - 1, temp.last.toDouble) 110 | }.unzip 111 | (label, term._1, term._2) 112 | }.cache() 113 | 114 | val d = terms.map(_._2.lastOption.getOrElse(0)).reduce(math.max) + 1 115 | terms.map { term => 116 | new Instance(term._1, new SparseVector(term._2.toArray, term._3.toArray, d)) 117 | } 118 | } 119 | case false => { 120 | lines.map { line => 121 | val items = line.split(' ') 122 | new Instance(items.head.toDouble, new DenseVector(items.drop(1).map(_.toDouble))) 123 | } 124 | } 125 | } 126 | 127 | } 128 | 129 | 130 | } 131 | 132 | 133 | /** 134 | * With this class, we add save-data methods to the RDD[Instance]. 135 | * 136 | * @param data 137 | */ 138 | class LIBBLERDD(val data: RDD[Instance]) { 139 | /** 140 | * Save data to File System in LibSVM format. 141 | * 142 | * @param path 143 | * @deprecated 144 | */ 145 | def saveAsLibSVMFile(path: String): Unit = { 146 | data.map { term => 147 | val line = new StringBuilder(term.label.toString) 148 | term.features.foreachActive { (i, v) => 149 | line ++= s" ${i + 1}:$v" 150 | } 151 | line.mkString 152 | }.saveAsTextFile(path) 153 | } 154 | 155 | /** 156 | * Save data to File System in LIBBLE format. 157 | * 158 | * @param path 159 | */ 160 | def saveAsLIBBLEFile(path: String): Unit = { 161 | val first = data.first() 162 | first.features match { 163 | case sv: SparseVector => { 164 | data.map { term => 165 | val line = new StringBuilder(term.label.toString) 166 | term.features.foreachActive { (i, v) => 167 | line ++= s" ${i + 1}:$v" 168 | } 169 | line.mkString 170 | }.saveAsTextFile(path) 171 | } 172 | case dv: DenseVector => { 173 | data.map { term => 174 | (term.label +: term.features.toArray).mkString(" ") 175 | }.saveAsTextFile(path) 176 | } 177 | } 178 | } 179 | 180 | 181 | } 182 | 183 | 184 | -------------------------------------------------------------------------------- /src/main/scala/clustering/KMeans.scala: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 4 | * All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * You may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | package libble.clustering 18 | 19 | import java.util 20 | 21 | import libble.linalg.implicits.vectorAdOps 22 | import libble.linalg.{DenseVector, Vector} 23 | import org.apache.spark.rdd.RDD 24 | 25 | /** 26 | * KMeans Algorithm. 27 | */ 28 | class KMeans( 29 | private var k: Int, 30 | private var maxIters: Int, 31 | private var stopBound: Double = 0) extends Serializable { 32 | 33 | @transient 34 | private var initCenters: Option[Array[(Vector, Double)]] = None 35 | 36 | def this(k: Int, stopBound: Double) = this(k, 100, stopBound) 37 | 38 | /** 39 | * set the number of clusters 40 | * 41 | * @param k 42 | * @return this 43 | */ 44 | def setK(k: Int): this.type = { 45 | this.k = k 46 | this 47 | } 48 | 49 | /** 50 | * set the Max Iter 51 | * 52 | * @param maxIters 53 | * @return this 54 | */ 55 | def setMaxIters(maxIters: Int): this.type = { 56 | this.maxIters = maxIters 57 | this 58 | } 59 | 60 | /** 61 | * set the convergence bound 62 | * 63 | * @param stopBound 64 | * @return this 65 | */ 66 | def setStopBound(stopBound: Double): this.type = { 67 | this.stopBound = stopBound 68 | this 69 | } 70 | 71 | /** 72 | * set the init Centers 73 | * 74 | * @param initCenters 75 | * @return this 76 | */ 77 | def setInitCenters(initCenters: Array[(Vector, Double)]): this.type = { 78 | require(initCenters.length == k) 79 | this.initCenters = Some(initCenters) 80 | this 81 | } 82 | 83 | /** 84 | * Do K-Means train 85 | * 86 | * @param data 87 | * @tparam T 88 | * @return (KMeansModel,cost) 89 | */ 90 | def train[T](data: RDD[(T, Vector)]): (KMeansModel, Double) = { 91 | val centers = initCenters.getOrElse(initCenter(data)) 92 | 93 | val trainData = data.map(e => (e._2, e._2.norm2)).cache() 94 | val squareStopBound = stopBound * stopBound 95 | 96 | var isConvergence = false 97 | var i = 0 98 | val costs = data.sparkContext.doubleAccumulator 99 | 100 | while (!isConvergence && i < maxIters) { 101 | costs.reset() 102 | val br_centers = data.sparkContext.broadcast(centers) 103 | 104 | val res = trainData.mapPartitions { iter => 105 | val counts = new Array[Int](k) 106 | util.Arrays.fill(counts, 0) 107 | val partSum = (0 until k).map(e => new DenseVector(br_centers.value(0)._1.size)) 108 | 109 | iter.foreach { e => 110 | val (index, cost) = KMeans.findNearest(e, br_centers.value) 111 | costs.add(cost) 112 | counts(index) += 1 113 | partSum(index) += e._1 114 | } 115 | counts.indices.filter(j => counts(j) > 0).map(j => (j -> (partSum(j), counts(j)))).iterator 116 | }.reduceByKey { case ((s1, c1), (s2, c2)) => 117 | (s1 += s2, c1 + c2) 118 | }.collectAsMap() 119 | br_centers.unpersist(false) 120 | 121 | 122 | println(s"cost at iter: $i is: ${costs.value}") 123 | isConvergence = true 124 | res.foreach { case (index, (sum, count)) => 125 | sum /= count 126 | val sumNorm2 = sum.norm2() 127 | val squareDist = math.pow(centers(index)._2, 2.0) + math.pow(sumNorm2, 2.0) - 2 * (centers(index)._1 * sum) 128 | if (squareDist >= squareStopBound) { 129 | isConvergence = false 130 | } 131 | centers(index) = (sum, sumNorm2) 132 | } 133 | i += 1 134 | } 135 | (new KMeansModel(centers), costs.value) 136 | } 137 | 138 | 139 | private def initCenter[T](data: RDD[(T, Vector)]): Array[(Vector, Double)] = { 140 | data.takeSample(false, k, System.currentTimeMillis()) 141 | .map(_._2).distinct.map(e => (e, e.norm2)) 142 | } 143 | 144 | override def equals(other: Any): Boolean = other match { 145 | case that: KMeans => 146 | (that canEqual this) && 147 | initCenters == that.initCenters && 148 | k == that.k && 149 | maxIters == that.maxIters && 150 | stopBound == that.stopBound 151 | case _ => false 152 | } 153 | 154 | def canEqual(other: Any): Boolean = other.isInstanceOf[KMeans] 155 | 156 | override def hashCode(): Int = { 157 | val state = Seq(initCenters, k, maxIters, stopBound) 158 | state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b) 159 | } 160 | } 161 | 162 | object KMeans { 163 | def findNearest(e: (Vector, Double), centers: Array[(Vector, Double)]): (Int, Double) = { 164 | var cost = Double.MaxValue 165 | var index = 0; 166 | for (i <- 0 until centers.length) { 167 | val center = centers(i) 168 | if (math.pow(e._2 - center._2, 2.0) < cost) { 169 | val squarePart = math.pow(e._2, 2.0) + math.pow(center._2, 2.0) 170 | val squareDist = squarePart - 2 * (e._1 * center._1) 171 | if (squareDist < cost) { 172 | cost = squareDist 173 | index = i 174 | } 175 | } 176 | } 177 | (index, cost) 178 | } 179 | } 180 | 181 | 182 | class KMeansModel(centers: Array[(Vector, Double)]) extends Serializable { 183 | 184 | def clustering[T](data: RDD[(T, Vector)]): RDD[(T, Int)] = { 185 | val br_center = data.sparkContext.broadcast(centers) 186 | data.map { e => 187 | val res = KMeans.findNearest((e._2, e._2.norm2), br_center.value) 188 | (e._1, res._1) 189 | } 190 | 191 | } 192 | 193 | 194 | } 195 | 196 | 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /src/main/scala/features/Scaller.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | package libble.features 16 | 17 | import libble.linalg.implicits.vectorAdOps 18 | import libble.linalg.{DenseVector, SparseVector, Vector} 19 | import org.apache.spark.rdd.RDD 20 | import scala.beans.BeanProperty 21 | 22 | 23 | /** 24 | * With this class, we scale the data to standard normal space in feature-wise. 25 | * @param centerlized 26 | * @param scalStd 27 | */ 28 | 29 | class Scaller(var centerlized: Boolean = false, var scalStd: Boolean = true) extends Serializable { 30 | @BeanProperty var center: Option[Vector] = None 31 | @BeanProperty var std: Option[Vector] = None 32 | 33 | /** 34 | * Compute center or std of the data. 35 | * @param data 36 | */ 37 | def computeFactor(data: RDD[Vector]): Unit = (centerlized, scalStd) match { 38 | case (true, false) => { 39 | center = Some(computeCenter(data)) 40 | 41 | } 42 | case (true, true) => { 43 | center = Some(computeCenter(data)) 44 | std = Some(coputeVariance(data)) 45 | } 46 | case (false, true) => { 47 | std = Some(coputeVariance(data)) 48 | } 49 | case (false, false) => { 50 | throw new IllegalArgumentException("you need not a scaller!!!") 51 | } 52 | 53 | 54 | } 55 | 56 | private def computeCenter(data: RDD[Vector]): Vector = { 57 | val n = data.first().size 58 | val (cum, num) = data.treeAggregate((new DenseVector(n), 0l))(seqOp = (c, v) => 59 | (c._1 += v, c._2 + 1), 60 | combOp = (c1, c2) => (c1._1 += c2._1, c1._2 + c2._2) 61 | ) 62 | cum /= num 63 | } 64 | 65 | private def coputeVariance(data: RDD[Vector]): Vector = centerlized match { 66 | case true => { 67 | val cen = center.get 68 | val n = cen.size 69 | val (total, num) = data.treeAggregate(new DenseVector(n), 0)(seqOp = (c, v) => { 70 | val temp = v - cen 71 | temp.bitwisePow(2.0) 72 | (c._1 += temp, c._2 + 1) 73 | }, combOp = (c1, c2) => { 74 | (c1._1 += c2._1, c1._2 + c2._2) 75 | 76 | }) 77 | total /= num 78 | total.bitwisePow(0.5) 79 | } 80 | case false => { 81 | val n = data.first().size 82 | val (total, num) = data.treeAggregate(new DenseVector(n), 0)(seqOp = (c, v) => { 83 | val temp = v.copy 84 | temp.bitwisePow(2.0) 85 | (c._1 += temp, c._2 + 1) 86 | }, combOp = (c1, c2) => { 87 | (c1._1 += c2._1, c1._2 + c2._2) 88 | 89 | }) 90 | total /= num 91 | total.bitwisePow(0.5) 92 | } 93 | } 94 | 95 | 96 | /** 97 | * Transform the data : RDD[Vector] with the factors. 98 | * @param data 99 | * @return 100 | */ 101 | def transform(data: RDD[Vector]): RDD[Vector] = { 102 | val panning: (Vector => Vector) = data.first match { 103 | case dv: DenseVector => panningD 104 | case sv: SparseVector => panningS 105 | } 106 | 107 | (centerlized, scalStd) match { 108 | case (true, false) => { 109 | if (center != None) { 110 | data.map(panning) 111 | } else { 112 | throw new IllegalAccessError("you should call computeFactor first!!!") 113 | } 114 | } 115 | case (true, true) => { 116 | if (center != None && std != None) { 117 | data.map(panning).map(scaling) 118 | } 119 | else { 120 | throw new IllegalAccessError("you should call computeFactor first!!!") 121 | } 122 | } 123 | case (false, true) => { 124 | if (std != None) { 125 | data.map(scaling) 126 | } 127 | else { 128 | throw new IllegalAccessError("you should call computeFactor first!!!") 129 | } 130 | } 131 | case (false, false) => { 132 | throw new IllegalArgumentException("you need not a scaller!!!") 133 | } 134 | } 135 | } 136 | 137 | /** 138 | * Transform the data : Vector with the factors. 139 | */ 140 | def transform(data: Vector): Vector = { 141 | val panning: (Vector => Vector) = data match { 142 | case sv: SparseVector => panningS 143 | case dv: DenseVector => panningD 144 | } 145 | 146 | (centerlized, scalStd) match { 147 | case (true, false) => { 148 | if (center != None) { 149 | panning(data) 150 | } else { 151 | throw new IllegalAccessError("you should call computeFactor first!!!") 152 | } 153 | 154 | } 155 | case (true, true) => { 156 | if (center != None && std != None) { 157 | panning(data) 158 | scaling(data) 159 | } 160 | else { 161 | throw new IllegalAccessError("you should call computeFactor first!!!") 162 | } 163 | } 164 | case (false, true) => { 165 | if (std != None) { 166 | scaling(data) 167 | } 168 | else { 169 | throw new IllegalAccessError("you should call computeFactor first!!!") 170 | } 171 | } 172 | case (false, false) => { 173 | throw new IllegalArgumentException("you need not a scaller!!!") 174 | } 175 | } 176 | } 177 | 178 | private def panningS(vec: Vector): Vector = { 179 | vec - center.get 180 | } 181 | 182 | private def panningD(vec: Vector): Vector = { 183 | vec -= center.get 184 | } 185 | 186 | private def scaling(vec: Vector): Vector = { 187 | val s = std.get 188 | vec match { 189 | case de: DenseVector => { 190 | val eValues = de.values 191 | var offset = 0 192 | while (offset < eValues.length) { 193 | eValues(offset) /= s.apply(offset) 194 | offset += 1 195 | } 196 | de 197 | } 198 | case se: SparseVector => { 199 | val eIndices = se.indices 200 | val eValues = se.values 201 | var offset = 0 202 | while (offset < eValues.length) { 203 | eValues(offset) /= s.apply(eIndices(offset)) 204 | offset += 1 205 | } 206 | se 207 | } 208 | } 209 | } 210 | 211 | } 212 | -------------------------------------------------------------------------------- /src/main/scala/linalg/Vector.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | package libble.linalg 16 | 17 | import java.util 18 | 19 | /** 20 | * This is the trait of Vector. 21 | */ 22 | sealed trait Vector extends Serializable { 23 | 24 | 25 | /** 26 | * Get the i-th element 27 | * 28 | * @param i 29 | * @return double 30 | */ 31 | def apply(i: Int): Double 32 | 33 | /** Length 34 | * 35 | * @return number of elements 36 | */ 37 | def size: Int 38 | 39 | /** 40 | * Return a copy of this. 41 | * 42 | * @return new copy 43 | */ 44 | def copy: Vector 45 | 46 | /** 47 | * Apply function on each item. 48 | * 49 | * @param f 50 | */ 51 | def foreachActive(f: (Int, Double) => Unit) 52 | 53 | /** 54 | * Return the number of nonzero elements. 55 | * 56 | * @return nnz 57 | */ 58 | def nnz: Int 59 | 60 | /** 61 | * Convert the vector to an array. 62 | * 63 | * @return array 64 | */ 65 | def toArray: Array[Double] 66 | 67 | 68 | def vector: Vector 69 | 70 | } 71 | 72 | /** 73 | * Class of Dense Vector. 74 | * 75 | * @param values 76 | */ 77 | case class DenseVector(val values: Array[Double]) extends Vector { 78 | 79 | /** 80 | * Initialize a DenseVector with all elements zero. 81 | * 82 | * @param size 83 | * @return 84 | */ 85 | def this(size: Int) = this { 86 | val temp = new Array[Double](size) 87 | util.Arrays.fill(temp, 0, size, 0.0) 88 | temp 89 | } 90 | 91 | /** 92 | * Return the i-th element. 93 | * 94 | * @param i 95 | * @return double 96 | */ 97 | override def apply(i: Int): Double = values(i) 98 | 99 | 100 | /** 101 | * Return a copy of this. 102 | * 103 | * @return new copy 104 | */ 105 | override def copy: DenseVector = { 106 | new DenseVector(values.clone()) 107 | } 108 | 109 | /** 110 | * Return a copy of this vector. 111 | * 112 | * @return copy 113 | */ 114 | override def clone(): DenseVector = { 115 | copy 116 | } 117 | 118 | /** 119 | * Return the hashcode of this vector. 120 | * 121 | * @return 122 | */ 123 | override def hashCode(): Int = { 124 | var code = 0 125 | var offset = 0 126 | while (offset < 7) { 127 | val bits = java.lang.Double.doubleToLongBits(values(offset)) 128 | code = code * 13 + (bits ^ (bits >>> 32)).toInt 129 | offset += 1 130 | } 131 | code 132 | } 133 | 134 | /** 135 | * Return the number of nonzero elements. 136 | * 137 | * @return nnz 138 | */ 139 | override def nnz: Int = { 140 | var num = 0 141 | var offset = 0 142 | while (offset < values.length) { 143 | if (values(offset) != 0) 144 | num += 1 145 | offset += 1 146 | } 147 | num 148 | } 149 | 150 | /** Length 151 | * 152 | * @return number of elements 153 | */ 154 | override def size: Int = values.length 155 | 156 | /** 157 | * Convert the vector to an array. 158 | * 159 | * @return array 160 | */ 161 | override def toArray: Array[Double] = values 162 | 163 | /** 164 | * Convert this vector to a string. 165 | * 166 | * @return 167 | */ 168 | override def toString(): String = { 169 | values.mkString("[", ",", "]") 170 | } 171 | 172 | /** 173 | * Apply function on each item. 174 | * 175 | * @param f 176 | */ 177 | override def foreachActive(f: (Int, Double) => Unit): Unit = { 178 | var offset = 0 179 | while (offset < size) { 180 | f(offset, values(offset)) 181 | offset += 1 182 | } 183 | } 184 | 185 | override def vector: Vector = this 186 | } 187 | 188 | /** 189 | * Class of the Sparse Vector. 190 | * 191 | * @param indices 192 | * @param values 193 | * @param dim 194 | */ 195 | case class SparseVector(val indices: Array[Int], val values: Array[Double], dim: Int) extends Vector { 196 | require(indices.length == values.length && indices.length <= size, "length of indices doesn't match actual !") 197 | 198 | 199 | /** 200 | * Return the active size of element. 201 | * 202 | * @return active size 203 | */ 204 | def activeSize: Int = indices.length 205 | 206 | /** 207 | * get the i-th element of this vector. 208 | * 209 | * @param i 210 | * @return double 211 | */ 212 | override def apply(i: Int): Double = { 213 | var offset = 0 214 | while (indices(offset) < i) { 215 | offset += 1 216 | } 217 | if (indices(offset) == i) { 218 | values(offset) 219 | } else { 220 | 0.0 221 | } 222 | } 223 | 224 | /** 225 | * Return a copy of this. 226 | * 227 | * @return new copy 228 | */ 229 | override def copy(): SparseVector = { 230 | new SparseVector(indices.clone(), values.clone(), dim) 231 | } 232 | 233 | /** 234 | * Return a copy of this vector. 235 | * 236 | * @return copy 237 | */ 238 | override def clone(): SparseVector = { 239 | copy() 240 | } 241 | 242 | /** 243 | * Return the hashcode of this vector. 244 | * 245 | * @return Int hashcode 246 | */ 247 | override def hashCode(): Int = { 248 | var code = size * indices.length 249 | var offset = 0 250 | while (offset < values.size && offset < 7) { 251 | val bits = java.lang.Double.doubleToLongBits(values(offset)) 252 | code = code * 13 + indices(offset) * (bits ^ (bits >>> 32)).toInt 253 | offset += 1 254 | } 255 | code 256 | } 257 | 258 | /** 259 | * Return the number of nonzero elements. 260 | * 261 | * @return nnz 262 | */ 263 | override def nnz: Int = { 264 | var num = 0 265 | var offset = 0 266 | while (offset < values.length) { 267 | if (values(offset) != 0) 268 | num += 1 269 | offset += 1 270 | } 271 | num 272 | } 273 | 274 | /** 275 | * Length 276 | * 277 | * @return number of elements 278 | */ 279 | override def size: Int = dim 280 | 281 | /** 282 | * Convert the vector to an array. 283 | * 284 | * @return array 285 | */ 286 | override def toArray: Array[Double] = { 287 | val data = new Array[Double](size) 288 | util.Arrays.fill(data, 0, size, 0.0) 289 | var offset = 0 290 | while (offset < activeSize) { 291 | data(indices(offset)) = values(offset) 292 | offset += 1 293 | } 294 | data 295 | } 296 | 297 | /** 298 | * Convert the vector to a string. 299 | * 300 | * @return string 301 | */ 302 | override def toString: String = { 303 | s"$size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")}" 304 | } 305 | 306 | /** 307 | * Apply function on each item. 308 | * 309 | * @param f 310 | */ 311 | override def foreachActive(f: (Int, Double) => Unit): Unit = { 312 | var offset = 0 313 | while (offset < activeSize) { 314 | f(indices(offset), values(offset)) 315 | offset += 1 316 | } 317 | } 318 | 319 | override def vector: Vector = this 320 | } 321 | 322 | -------------------------------------------------------------------------------- /src/main/scala/dimReduction/GLS_Matrix_Batch.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 3 | * All Rights Reserved. 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. */ 15 | package libble.dimReduction 16 | 17 | import java.util.Calendar 18 | 19 | import libble.linalg.implicits._ 20 | import libble.linalg.{DenseVector, Vector} 21 | import org.apache.spark.rdd.RDD 22 | 23 | import scala.collection.mutable.ArrayBuffer 24 | import scala.util.Random 25 | 26 | 27 | /** 28 | * 29 | * This class is the Generalized Linear Algorithms for PCA model which uses mini-batch strategy during optimization process. 30 | * 31 | * @param stepSize 32 | * @param regParam 33 | * @param factor 34 | * @param iters 35 | * @param parts 36 | * @param batchSize 37 | * @param K 38 | */ 39 | class GLS_Matrix_Batch(var stepSize: Double, 40 | var regParam: Double, 41 | var factor: Double, 42 | var iters: Int, 43 | var parts: Int, 44 | var batchSize: Int, 45 | var K: Int) extends Serializable { 46 | def this() = this(1.0, 0.0001, 0.0001, 5, 2, 1, 1) 47 | 48 | private[this] var stopBound: Double = 0.0 49 | var weightsVector: Option[Vector] = None 50 | 51 | /** 52 | * Set the stop bound. 53 | * 54 | * @param value 55 | * @return this.type 56 | */ 57 | def setStopBound(value: Double): this.type = { 58 | stopBound = value 59 | this 60 | } 61 | 62 | /** 63 | * Train the model on training data. 64 | * 65 | * @param input 66 | * @return principle components and loss array 67 | */ 68 | def train(input: RDD[Vector]): (Array[Vector], Array[Double]) = { 69 | val dims = input.first().size 70 | val W0 = new Array[Vector](K) 71 | for (i <- 0 to K - 1) { 72 | val arr = new Array[Double](dims) 73 | for (j <- arr.indices) 74 | arr(j) = Random.nextGaussian() 75 | W0(i) = new DenseVector(arr.clone()) 76 | val n = W0(i).norm2() 77 | W0(i) /= n 78 | } 79 | 80 | train(input, W0) 81 | } 82 | 83 | 84 | /** 85 | * Train on training data with initial weights. 86 | * 87 | * @param input 88 | * @param initialWs 89 | * @return principle components and loss array 90 | */ 91 | def train(input: RDD[Vector], initialWs: Array[Vector]): (Array[Vector], Array[Double]) = { 92 | if (parts == (-1)) parts = input.partitions.length 93 | val data = { 94 | if (parts == input.partitions.length) 95 | input.cache() 96 | else 97 | input.coalesce(parts, true).cache() 98 | } 99 | runEngine(data, initialWs) 100 | } 101 | 102 | 103 | /** 104 | * The PCA optimization engine. 105 | * 106 | * @param data 107 | * @param initialWs 108 | * @return 109 | */ 110 | private[this] def runEngine(data: RDD[Vector], initialWs: Array[Vector]): (Array[Vector], Array[Double]) = { 111 | 112 | val K = initialWs.length 113 | val count = data.count() 114 | var weights = new Array[Vector](K) 115 | for (k <- 0 to K - 1) 116 | weights(k) = initialWs(k).copy 117 | val n = weights(0).size 118 | var convergenced = false 119 | 120 | val startTime = Calendar.getInstance().getTimeInMillis 121 | 122 | /** 123 | * outer loop 124 | */ 125 | val lossArray = ArrayBuffer[Double]() 126 | var i = 0 127 | var time = 0l 128 | 129 | while (i < iters && !convergenced) { 130 | 131 | val w = data.context.broadcast(weights) 132 | var time = Calendar.getInstance().getTimeInMillis 133 | val temp = new Array[Vector](K) 134 | for (k <- 0 to K - 1) 135 | temp(k) = new DenseVector(n) 136 | 137 | val (mu, lossTotal, diag) = data.treeAggregate(temp, 0.0, new Array[Double](K))( 138 | seqOp = (c, v) => { 139 | var lossTemp = 0.0 140 | for (k <- 0 to K - 1) { 141 | val inner = v * w.value(k) 142 | val loss = -1.0 * inner * inner 143 | c._1(k).plusax(inner, v) 144 | c._3(k) += loss 145 | lossTemp += loss 146 | } 147 | (c._1, c._2 + lossTemp, c._3) 148 | }, 149 | combOp = (c1, c2) => { 150 | for (k <- 0 to K - 1) { 151 | c2._1(k) += c1._1(k) 152 | c2._3(k) += c1._3(k) 153 | } 154 | (c2._1, c1._2 + c2._2, c2._3) 155 | } 156 | ) 157 | for (k <- 0 to K - 1) 158 | mu(k) /= count.toDouble 159 | 160 | val loss = lossTotal / count.toDouble 161 | println(s"$loss ${time - startTime} ") 162 | for (k <- 0 to K - 1) 163 | println(diag(k) / count.toDouble) 164 | println() 165 | lossArray += loss 166 | 167 | 168 | val temp2 = new Array[Vector](K) 169 | for (k <- 0 to K - 1) 170 | temp2(k) = new DenseVector(n) 171 | 172 | val w_0 = data.context.broadcast(weights) 173 | val weightsAll = data.mapPartitions({ iter => 174 | val omiga = new Array[Vector](K) 175 | for (k <- 0 to K - 1) 176 | omiga(k) = w_0.value(k).copy 177 | val indexSeq = iter.toIndexedSeq 178 | val pNum = indexSeq.size 179 | 180 | /** 181 | * inner loop 182 | */ 183 | for (j <- 1 to pNum / batchSize) { 184 | 185 | val delta = new Array[Vector](K) 186 | for (k <- 0 to K - 1) 187 | delta(k) = new DenseVector(n) 188 | 189 | for (b <- 1 to batchSize) { 190 | val e = indexSeq(Random.nextInt(pNum)) 191 | for (k <- 0 to K - 1) { 192 | val f1 = e * omiga(k) 193 | val f2 = e * w_0.value(k) 194 | delta(k).plusax(f1 - f2, e) 195 | } 196 | } 197 | 198 | for (k <- 0 to K - 1) { 199 | delta(k) /= batchSize 200 | delta(k) += mu(k) 201 | omiga(k).plusax(stepSize, delta(k)) 202 | } 203 | 204 | GramSchmidt(omiga) 205 | } 206 | Iterator(omiga) 207 | }, true) 208 | .treeAggregate(temp2)(seqOp = (c, w) => { 209 | for (k <- 0 to K - 1) 210 | c(k) += w(k) 211 | c 212 | }, combOp = { (w1, w2) => 213 | for (k <- 0 to K - 1) 214 | w1(k) += w2(k) 215 | w1 216 | }) 217 | 218 | for (k <- 0 to K - 1) 219 | weightsAll(k) /= parts.toDouble 220 | 221 | GramSchmidt(weightsAll) 222 | 223 | weights = weightsAll 224 | 225 | if (i >= 2) 226 | convergenced = isConvergenced(lossArray) 227 | i += 1 228 | time = Calendar.getInstance().getTimeInMillis 229 | } 230 | println(s"losses of the last 10 iteration are:${lossArray.takeRight(5).mkString(",")}") 231 | 232 | (weights, lossArray.toArray) 233 | 234 | } 235 | 236 | /** 237 | * Judge whether the convergence condition is satisfied. 238 | * 239 | * @param lossArray 240 | * @return Boolean 241 | */ 242 | private[this] def isConvergenced(lossArray: ArrayBuffer[Double]): Boolean = { 243 | val len = lossArray.length 244 | (math.abs(lossArray(len - 1) - lossArray(len - 2)) < stopBound) && (lossArray(len - 1) < lossArray(len - 2)) 245 | } 246 | 247 | /** 248 | * 249 | * This method is the implementation of GramSchmidt orthonormalization which is invoked in each inner loop. 250 | * 251 | * @param weights 252 | */ 253 | def GramSchmidt(weights: Array[Vector]): Unit = { 254 | val beta = new Array[Vector](K) 255 | for (k <- 0 to K - 1) { 256 | weights(k) /= parts.toDouble 257 | beta(k) = weights(k).copy 258 | for (j <- 0 to k - 1) { 259 | val xishu = (beta(j) * weights(k)) / (beta(j) * beta(j)) 260 | beta(k).plusax(-1.0 * xishu, beta(j)) 261 | } 262 | } 263 | for (k <- 0 to K - 1) { 264 | val normk = beta(k).norm2() 265 | beta(k) /= normk 266 | weights(k) = beta(k).copy 267 | } 268 | } 269 | 270 | } 271 | -------------------------------------------------------------------------------- /.idea/uiDesigner.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /src/main/scala/collaborativeFiltering/MatrixFactorization.scala: -------------------------------------------------------------------------------- 1 | /* 2 | * 3 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University. 4 | * All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * You may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | * 16 | */ 17 | 18 | /** 19 | * Created by syh on 2016/12/9. 20 | */ 21 | 22 | package libble.collaborativeFiltering 23 | 24 | import libble.linalg.implicits._ 25 | import libble.linalg.{DenseVector, Vector} 26 | import libble.utils.{XORShiftRandom, WorkerStore} 27 | import org.apache.spark.rdd.RDD 28 | 29 | import scala.collection.mutable.ArrayBuffer 30 | import scala.util.hashing.byteswap64 31 | 32 | case class Rating(rating: Double, index_x: Int, index_y: Int) 33 | 34 | /** 35 | * This is an acceleration version of matrix factorization, 36 | * but it require that numParts equal to the actual number of machines. 37 | */ 38 | class MatrixFactorization extends Serializable{ 39 | /** 40 | * initialize the user factors and item factors randomly 41 | * 42 | * @param indices user(item) indices 43 | * @param rank the length of factor 44 | * @return 45 | */ 46 | def initialize(indices: Set[Int], rank :Int) : Map[Int, Vector]= { 47 | val seedGen = new XORShiftRandom() 48 | val random = new XORShiftRandom(byteswap64(seedGen.nextLong())) 49 | val vectors = new Array[Vector](indices.size) 50 | for (i <- vectors.indices) { 51 | val factors = Array.fill(rank)(random.nextGaussian()) 52 | val v = new DenseVector(factors) 53 | v /= v.norm2() 54 | vectors(i) = v 55 | } 56 | indices.zip(vectors).toMap 57 | } 58 | 59 | /** 60 | * This is an acceleration version of matrix factorization, 61 | * but it require that numParts equal to the actual number of machines. 62 | * 63 | * @param trainSet RDD of ratings 64 | * @param numIters number of outer loop 65 | * @param numParts number of workers 66 | * @param rank length of factor 67 | * @param lambda_u regularization parameter of users 68 | * @param lambda_v regularization parameter of items 69 | * @param stepSize stepsize for update the factors. 70 | * @return matrix factorization model 71 | */ 72 | def train (trainSet: RDD[Rating], 73 | numIters: Int, 74 | numParts: Int, 75 | rank: Int, 76 | lambda_u: Double, 77 | lambda_v: Double, 78 | stepSize: Double, 79 | ifPrintLoss: Int) : MatrixFactorizationModel = { 80 | var stepsize = stepSize 81 | val items = trainSet.mapPartitions{iter => 82 | val is = iter.map(r => r.index_y).toSet 83 | Iterator.single(is) 84 | } 85 | .reduce((a,b)=> a.union(b)) 86 | val numRatings = trainSet.count() 87 | //random hash the data by row 88 | val ratingsByRow = trainSet.groupBy(_.index_x) 89 | .repartition(numParts) 90 | .values 91 | .flatMap(i=>i) 92 | .cache() 93 | //number of inner iterations is the maximum number of ratings in p workers 94 | val numInnerIters = ratingsByRow.mapPartitions(i => Iterator.single(i.length)).reduce((a,b)=>math.max(a,b)) 95 | 96 | //initialize item factors in master 97 | var itemFactors = initialize(items, rank) 98 | //initialize U in p workers 99 | ratingsByRow.mapPartitionsWithIndex{(index,iter) => 100 | val indices_x = iter.map(r => r.index_x).toSet 101 | val userFactors = initialize(indices_x,rank) 102 | MatrixFactorization.workerstore.put(s"userFactors_$index", userFactors) 103 | Iterator.single(0) 104 | }.count() 105 | //main loop 106 | val startTime = System.currentTimeMillis() 107 | val lossList = new ArrayBuffer[Double]() 108 | var testTime = 0L 109 | var i = 0 110 | while (i < numIters){ 111 | if(ifPrintLoss == 1){ 112 | //loss 113 | val testTimeStart = System.currentTimeMillis() 114 | val bc_test_itemFactors = ratingsByRow.context.broadcast(itemFactors) 115 | //training loss 116 | val loss = ratingsByRow.mapPartitions {iter => 117 | val localV = bc_test_itemFactors.value 118 | val localU = MatrixFactorization.workerstore.get[Map[Int, Vector]]("userFactors") 119 | val reguV = localV.mapValues(v => lambda_v * v.dot(v)) 120 | val reguU = localU.mapValues(u => lambda_u * u.dot(u)) 121 | val ls = iter.foldLeft(0.0) { (l, r) => 122 | val uh = localU.get(r.index_x).get 123 | val vj = localV.get(r.index_y).get 124 | val residual = r.rating - uh.dot(vj) 125 | l + residual * residual + reguU.get(r.index_x).get + reguV.get(r.index_y).get 126 | } 127 | Iterator.single(ls) 128 | }.reduce(_ + _) / numRatings 129 | bc_test_itemFactors.unpersist() 130 | print(s"$loss\t") 131 | testTime += (System.currentTimeMillis() - testTimeStart) 132 | println(s"${System.currentTimeMillis() - testTime - startTime}") 133 | } 134 | //broadcast V to p workers 135 | val bc_itemFactors = ratingsByRow.context.broadcast(itemFactors) 136 | //for each woker i parallelly do 137 | val (newItemFactors, lossSum) = ratingsByRow.mapPartitionsWithIndex{case(index,iter) => 138 | val localRatings = iter.toArray 139 | val numLocalRatings = localRatings.length 140 | val localV = bc_itemFactors.value 141 | val localU = MatrixFactorization.workerstore.get[Map[Int, Vector]](s"userFactors_$index") 142 | val seedGen = new XORShiftRandom() 143 | val random = new XORShiftRandom(byteswap64(seedGen.nextLong() ^ index)) 144 | var loss = 0.0 145 | //inner loop 146 | for(i <- 1 to numInnerIters){ 147 | //randomly select an instance r_h,k from R_i 148 | val ranRating = localRatings(random.nextInt(numLocalRatings)) 149 | val uh = localU.get(ranRating.index_x).get 150 | val vj = localV.get(ranRating.index_y).get 151 | //update uh 152 | val residual = ranRating.rating - uh.dot(vj) 153 | uh *= (1- stepsize * lambda_u) 154 | uh.plusax(stepsize * residual, vj) 155 | } 156 | for(i <- 1 to numInnerIters){ 157 | //randomly select an instance r_h,k from R_i 158 | val ranRating = localRatings(random.nextInt(numLocalRatings)) 159 | val uh = localU.get(ranRating.index_x).get 160 | val vj = localV.get(ranRating.index_y).get 161 | //update vj 162 | val residual = ranRating.rating - uh.dot(vj) 163 | val rrr = stepsize * residual 164 | vj *= (1 - stepsize * lambda_v) 165 | vj.plusax(stepsize * residual, uh) 166 | loss += (residual * residual) 167 | } 168 | Iterator.single((bc_itemFactors.value, loss)) 169 | } 170 | .reduce { (a, b) => 171 | val temp = a._1 172 | b._1.foreach{case (i, v) => 173 | v.plusax(1.0, temp.get(i).get) 174 | } 175 | (b._1, a._2 + b._2) 176 | } 177 | itemFactors = newItemFactors 178 | itemFactors.foreach(ui => ui._2 /= numParts.toDouble) 179 | bc_itemFactors.unpersist() 180 | 181 | val approxLoss = lossSum / (numParts * numInnerIters) 182 | if (i != 0) { 183 | val oldLoss = lossList.last 184 | if (approxLoss > oldLoss) 185 | stepsize = stepsize * 0.5 186 | else 187 | stepsize *= 1.05 188 | } 189 | lossList.append(approxLoss) 190 | 191 | // println(s"approximate loss: $approxLoss, time: ${System.currentTimeMillis() - startTime}") 192 | 193 | i += 1 194 | } 195 | val trainOver = System.currentTimeMillis() 196 | val bc_test_itemFactors = ratingsByRow.context.broadcast(itemFactors) 197 | val loss = ratingsByRow.mapPartitionsWithIndex { (index,iter )=> 198 | val localV = bc_test_itemFactors.value 199 | val localU = MatrixFactorization.workerstore.get[Map[Int, Vector]](s"userFactors_$index") 200 | val reguV = localV.mapValues(v => lambda_v * v.dot(v)) 201 | val reguU = localU.mapValues(u => lambda_u * u.dot(u)) 202 | val ls = iter.foldLeft(0.0) { (l, r) => 203 | val uh = localU.get(r.index_x).get 204 | val vj = localV.get(r.index_y).get 205 | val residual = r.rating - uh.dot(vj) 206 | l + residual * residual + reguU.get(r.index_x).get + reguV.get(r.index_y).get 207 | } 208 | Iterator.single(ls) 209 | } 210 | .reduce(_ + _) / numRatings 211 | bc_test_itemFactors.unpersist() 212 | println(s"loss: $loss\t") 213 | println(s"cputime of training process(ms): ${ trainOver - startTime }") 214 | 215 | val userFactorsRDD = ratingsByRow.mapPartitionsWithIndex{(index,iter) => 216 | val factors = MatrixFactorization.workerstore.get[Map[Int, Vector]](s"userFactors_$index") 217 | factors.toIterator 218 | }.cache() 219 | val itemFactorsRDD = ratingsByRow.context.parallelize(itemFactors.toSeq, numParts).cache() 220 | new MatrixFactorizationModel(rank, userFactorsRDD, itemFactorsRDD) 221 | } 222 | } 223 | 224 | object MatrixFactorization { 225 | val workerstore = new WorkerStore() 226 | } 227 | --------------------------------------------------------------------------------