├── .idea
├── .name
├── copyright
│ ├── profiles_settings.xml
│ └── libble.xml
├── encodings.xml
├── vcs.xml
├── scala_compiler.xml
├── libraries
│ ├── Maven__oro_oro_2_0_8.xml
│ ├── Maven__junit_junit_4_12.xml
│ ├── Maven__org_tukaani_xz_1_0.xml
│ ├── Maven__log4j_log4j_1_2_17.xml
│ ├── Maven__xmlenc_xmlenc_0_52.xml
│ ├── Maven__net_sf_py4j_py4j_0_10_3.xml
│ ├── Maven__org_apache_ivy_ivy_2_4_0.xml
│ ├── Maven__net_jpountz_lz4_lz4_1_3_0.xml
│ ├── Maven__org_apache_avro_avro_1_7_7.xml
│ ├── Maven__com_google_inject_guice_3_0.xml
│ ├── Maven__net_razorvine_pyrolite_4_9.xml
│ ├── Maven__net_sf_opencsv_opencsv_2_3.xml
│ ├── Maven__commons_io_commons_io_2_1.xml
│ ├── Maven__org_objenesis_objenesis_2_1.xml
│ ├── Maven__aopalliance_aopalliance_1_0.xml
│ ├── Maven__com_google_guava_guava_14_0_1.xml
│ ├── Maven__commons_cli_commons_cli_1_2.xml
│ ├── Maven__commons_net_commons_net_2_2.xml
│ ├── Maven__io_netty_netty_3_8_0_Final.xml
│ ├── Maven__javax_inject_javax_inject_1.xml
│ ├── Maven__org_scala_lang_scalap_2_11_8.xml
│ ├── Maven__org_slf4j_slf4j_api_1_7_21.xml
│ ├── Maven__org_jpmml_pmml_model_1_2_15.xml
│ ├── Maven__com_ning_compress_lzf_1_0_3.xml
│ ├── Maven__com_twitter_chill_2_11_0_8_0.xml
│ ├── Maven__com_twitter_chill_java_0_8_0.xml
│ ├── Maven__commons_lang_commons_lang_2_5.xml
│ ├── Maven__net_java_dev_jets3t_jets3t_0_7_1.xml
│ ├── Maven__org_apache_avro_avro_ipc_1_7_7.xml
│ ├── Maven__org_codehaus_janino_janino_2_7_8.xml
│ ├── Maven__org_jpmml_pmml_schema_1_2_15.xml
│ ├── Maven__com_esotericsoftware_minlog_1_3_0.xml
│ ├── Maven__org_hamcrest_hamcrest_core_1_3.xml
│ ├── Maven__com_github_fommil_netlib_core_1_1_2.xml
│ ├── Maven__commons_codec_commons_codec_1_3.xml
│ ├── Maven__org_slf4j_jul_to_slf4j_1_7_16.xml
│ ├── Maven__org_spire_math_spire_2_11_0_7_4.xml
│ ├── Maven__com_github_rwl_jtransforms_2_4_0.xml
│ ├── Maven__org_antlr_antlr4_runtime_4_5_3.xml
│ ├── Maven__org_scalanlp_breeze_2_11_0_11_2.xml
│ ├── Maven__org_slf4j_slf4j_log4j12_1_7_21.xml
│ ├── Maven__org_scala_sbt_test_interface_1_0.xml
│ ├── Maven__org_spark_project_spark_unused_1_0_0.xml
│ ├── Maven__com_google_code_findbugs_jsr305_1_3_9.xml
│ ├── Maven__io_netty_netty_all_4_0_29_Final.xml
│ ├── Maven__org_apache_avro_avro_ipc_tests_1_7_7.xml
│ ├── Maven__org_slf4j_jcl_over_slf4j_1_7_16.xml
│ ├── Maven__com_clearspring_analytics_stream_2_7_0.xml
│ ├── Maven__com_novocode_junit_interface_0_11.xml
│ ├── Maven__org_apache_commons_commons_math_2_1.xml
│ ├── Maven__org_apache_hadoop_hadoop_auth_2_2_0.xml
│ ├── Maven__org_apache_hadoop_hadoop_hdfs_2_2_0.xml
│ ├── Maven__org_apache_zookeeper_zookeeper_3_4_5.xml
│ ├── Maven__org_glassfish_hk2_hk2_api_2_4_0_b34.xml
│ ├── Maven__org_javassist_javassist_3_18_1_GA.xml
│ ├── Maven__org_mortbay_jetty_jetty_util_6_1_26.xml
│ ├── Maven__javax_ws_rs_javax_ws_rs_api_2_0_1.xml
│ ├── Maven__org_apache_commons_commons_math3_3_2.xml
│ ├── Maven__org_apache_mesos_mesos_shaded_protobuf_0_21_1.xml
│ ├── Maven__org_json4s_json4s_ast_2_11_3_2_11.xml
│ ├── Maven__org_scala_lang_scala_actors_2_11_8.xml
│ ├── Maven__org_scalatest_scalatest_2_11_2_2_6.xml
│ ├── Maven__org_scala_lang_scala_reflect_2_11_8.xml
│ ├── Maven__com_esotericsoftware_kryo_shaded_3_0_3.xml
│ ├── Maven__com_thoughtworks_paranamer_paranamer_2_6.xml
│ ├── Maven__org_apache_avro_avro_mapred_hadoop2_1_7_7.xml
│ ├── Maven__org_json4s_json4s_core_2_11_3_2_11.xml
│ ├── Maven__commons_digester_commons_digester_1_8.xml
│ ├── Maven__io_dropwizard_metrics_metrics_jvm_3_1_2.xml
│ ├── Maven__org_apache_hadoop_hadoop_client_2_2_0.xml
│ ├── Maven__org_apache_hadoop_hadoop_common_2_2_0.xml
│ ├── Maven__org_apache_spark_spark_sql_2_11_2_0_1.xml
│ ├── Maven__org_glassfish_hk2_hk2_utils_2_4_0_b34.xml
│ ├── Maven__org_scala_lang_scala_compiler_2_11_8.xml
│ ├── Maven__org_xerial_snappy_snappy_java_1_1_2_6.xml
│ ├── Maven__org_apache_commons_commons_lang3_3_3_2.xml
│ ├── Maven__org_roaringbitmap_RoaringBitmap_0_5_11.xml
│ ├── Maven__com_google_protobuf_protobuf_java_2_5_0.xml
│ ├── Maven__com_univocity_univocity_parsers_2_1_1.xml
│ ├── Maven__io_dropwizard_metrics_metrics_core_3_1_2.xml
│ ├── Maven__io_dropwizard_metrics_metrics_json_3_1_2.xml
│ ├── Maven__javax_servlet_javax_servlet_api_3_1_0.xml
│ ├── Maven__org_apache_curator_curator_client_2_4_0.xml
│ ├── Maven__org_apache_parquet_parquet_column_1_7_0.xml
│ ├── Maven__org_apache_parquet_parquet_common_1_7_0.xml
│ ├── Maven__org_apache_parquet_parquet_hadoop_1_7_0.xml
│ ├── Maven__org_apache_spark_spark_core_2_11_2_0_1.xml
│ ├── Maven__org_apache_spark_spark_tags_2_11_2_0_1.xml
│ ├── Maven__org_apache_xbean_xbean_asm5_shaded_4_4.xml
│ ├── Maven__org_apache_hadoop_hadoop_yarn_api_2_2_0.xml
│ ├── Maven__org_apache_spark_spark_mllib_2_11_2_0_1.xml
│ ├── Maven__org_glassfish_hk2_hk2_locator_2_4_0_b34.xml
│ ├── Maven__org_spire_math_spire_macros_2_11_0_7_4.xml
│ ├── Maven__org_apache_curator_curator_recipes_2_4_0.xml
│ ├── Maven__org_apache_parquet_parquet_jackson_1_7_0.xml
│ ├── Maven__org_json4s_json4s_jackson_2_11_3_2_11.xml
│ ├── Maven__org_scalanlp_breeze_macros_2_11_0_11_2.xml
│ ├── Maven__commons_httpclient_commons_httpclient_3_1.xml
│ ├── Maven__org_apache_commons_commons_compress_1_4_1.xml
│ ├── Maven__org_apache_parquet_parquet_encoding_1_7_0.xml
│ ├── Maven__org_apache_spark_spark_graphx_2_11_2_0_1.xml
│ ├── Maven__org_apache_spark_spark_sketch_2_11_2_0_1.xml
│ ├── Maven__org_apache_spark_spark_unsafe_2_11_2_0_1.xml
│ ├── Maven__commons_beanutils_commons_beanutils_1_7_0.xml
│ ├── Maven__org_codehaus_janino_commons_compiler_2_7_8.xml
│ ├── Maven__org_fusesource_leveldbjni_leveldbjni_all_1_8.xml
│ ├── Maven__org_scala_lang_modules_scala_xml_2_11_1_0_4.xml
│ ├── Maven__com_fasterxml_jackson_core_jackson_core_2_6_5.xml
│ ├── Maven__javax_annotation_javax_annotation_api_1_2.xml
│ ├── Maven__org_apache_curator_curator_framework_2_4_0.xml
│ ├── Maven__org_apache_parquet_parquet_generator_1_7_0.xml
│ ├── Maven__io_dropwizard_metrics_metrics_graphite_3_1_2.xml
│ ├── Maven__net_sourceforge_f2j_arpack_combined_all_0_1.xml
│ ├── Maven__org_apache_hadoop_hadoop_annotations_2_2_0.xml
│ ├── Maven__org_apache_hadoop_hadoop_yarn_client_2_2_0.xml
│ ├── Maven__org_apache_hadoop_hadoop_yarn_common_2_2_0.xml
│ ├── Maven__org_apache_spark_spark_catalyst_2_11_2_0_1.xml
│ ├── Maven__org_apache_spark_spark_launcher_2_11_2_0_1.xml
│ ├── Maven__org_codehaus_jackson_jackson_core_asl_1_9_13.xml
│ ├── Maven__org_glassfish_jersey_core_jersey_client_2_22_2.xml
│ ├── Maven__org_glassfish_jersey_core_jersey_common_2_22_2.xml
│ ├── Maven__org_glassfish_jersey_core_jersey_server_2_22_2.xml
│ ├── Maven__javax_validation_validation_api_1_1_0_Final.xml
│ ├── Maven__org_apache_spark_spark_streaming_2_11_2_0_1.xml
│ ├── Maven__commons_collections_commons_collections_3_2_1.xml
│ ├── Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_13.xml
│ ├── Maven__org_glassfish_hk2_osgi_resource_locator_1_0_1.xml
│ ├── Maven__com_fasterxml_jackson_core_jackson_databind_2_6_5.xml
│ ├── Maven__commons_configuration_commons_configuration_1_6.xml
│ ├── Maven__org_apache_spark_spark_mllib_local_2_11_2_0_1.xml
│ ├── Maven__org_glassfish_hk2_external_javax_inject_2_4_0_b34.xml
│ ├── Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml
│ ├── Maven__org_glassfish_jersey_media_jersey_media_jaxb_2_22_2.xml
│ ├── Maven__com_fasterxml_jackson_core_jackson_annotations_2_6_5.xml
│ ├── Maven__org_apache_spark_spark_network_common_2_11_2_0_1.xml
│ ├── Maven__org_apache_hadoop_hadoop_yarn_server_common_2_2_0.xml
│ ├── Maven__org_apache_spark_spark_network_shuffle_2_11_2_0_1.xml
│ ├── Maven__org_apache_parquet_parquet_format_2_3_0_incubating.xml
│ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_app_2_2_0.xml
│ ├── Maven__org_glassfish_jersey_bundles_repackaged_jersey_guava_2_22_2.xml
│ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_core_2_2_0.xml
│ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_common_2_2_0.xml
│ ├── Maven__com_fasterxml_jackson_module_jackson_module_paranamer_2_6_5.xml
│ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_shuffle_2_2_0.xml
│ ├── Maven__org_glassfish_hk2_external_aopalliance_repackaged_2_4_0_b34.xml
│ ├── Maven__com_fasterxml_jackson_module_jackson_module_scala_2_11_2_6_5.xml
│ ├── Maven__org_scala_lang_modules_scala_parser_combinators_2_11_1_0_4.xml
│ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_jobclient_2_2_0.xml
│ ├── Maven__org_glassfish_jersey_containers_jersey_container_servlet_2_22_2.xml
│ ├── Maven__org_glassfish_jersey_containers_jersey_container_servlet_core_2_22_2.xml
│ └── Maven__org_scala_lang_scala_library_2_11_8.xml
├── modules.xml
├── misc.xml
├── compiler.xml
└── uiDesigner.xml
├── README.md
├── data
└── testMF.txt
└── src
├── main
└── scala
│ ├── utils
│ ├── WorkerStore.scala
│ └── XORShiftRandom.scala
│ ├── linalg
│ ├── package.scala
│ └── Vector.scala
│ ├── context
│ ├── Instance.scala
│ └── implicits.scala
│ ├── examples
│ ├── LoadFile.scala
│ ├── testScaller.scala
│ ├── testKMeans.scala
│ ├── testLR.scala
│ ├── testSVD.scala
│ ├── testPCA.scala
│ └── testCF.scala
│ ├── regression
│ ├── Lasso.scala
│ └── LinearRegression.scala
│ ├── collaborativeFiltering
│ ├── MatrixFactorizationModel.scala
│ └── MatrixFactorization.scala
│ ├── classification
│ ├── SVM.scala
│ └── LogisticRegression.scala
│ ├── dimReduction
│ ├── SVD.scala
│ ├── PCA.scala
│ └── GLS_Matrix_Batch.scala
│ ├── generalizedLinear
│ └── Regularizer.scala
│ ├── clustering
│ └── KMeans.scala
│ └── features
│ └── Scaller.scala
└── test
└── scala
└── linalg
└── VectorsOpt.scala
/.idea/.name:
--------------------------------------------------------------------------------
1 | libble-spark
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LIBBLE-Spark
2 |
3 | LIBBLE-Spark is a library for big data machine learning on Spark. Please visit http://www.libble.ml for more details.
4 |
--------------------------------------------------------------------------------
/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/data/testMF.txt:
--------------------------------------------------------------------------------
1 | 1,1,3.0
2 | 1,2,4.0
3 | 1,3,2.8
4 | 1,4,4.0
5 | 1,5,3.7
6 | 1,6,4.7
7 | 2,1,2.0
8 | 2,2,5.0
9 | 2,3,4.8
10 | 2,4,2.6
11 | 2,5,4.2
12 | 2,6,3.0
13 | 3,1,4.3
14 | 3,2,3.2
15 | 3,3,5.0
16 | 3,4,4.9
17 | 3,5,3.2
18 | 3,6,4.0
19 | 4,1,3.0
20 | 4,2,4.3
21 | 4,3,4.3
22 | 4,4,1.0
23 | 4,5,3.2
24 | 4,6,2.3
25 | 5,1,4.0
26 | 5,2,4.3
27 | 5,3,4.5
28 | 5,4,2.3
29 | 5,5,2.0
30 | 5,6,1.0
--------------------------------------------------------------------------------
/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/src/main/scala/utils/WorkerStore.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Created by syh on 2016/12/9.
3 | */
4 | package libble.utils
5 |
6 | import scala.collection.mutable.{Map => mutableMap}
7 |
8 | class WorkerStore() {
9 | val store = mutableMap[String, Any]()
10 |
11 | def get[T](key: String): T = {
12 | store(key).asInstanceOf[T]
13 | }
14 |
15 | def put(key: String, value: Any) = {
16 | store += (key -> value)
17 | }
18 | }
--------------------------------------------------------------------------------
/.idea/libraries/Maven__oro_oro_2_0_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__junit_junit_4_12.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_tukaani_xz_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__log4j_log4j_1_2_17.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__xmlenc_xmlenc_0_52.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_sf_py4j_py4j_0_10_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_ivy_ivy_2_4_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_jpountz_lz4_lz4_1_3_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_1_7_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_inject_guice_3_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_razorvine_pyrolite_4_9.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_sf_opencsv_opencsv_2_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_io_commons_io_2_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_objenesis_objenesis_2_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__aopalliance_aopalliance_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_guava_guava_14_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_net_commons_net_2_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_netty_netty_3_8_0_Final.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_inject_javax_inject_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_scalap_2_11_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_21.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_jpmml_pmml_model_1_2_15.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_ning_compress_lzf_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_twitter_chill_2_11_0_8_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_twitter_chill_java_0_8_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_lang_commons_lang_2_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_java_dev_jets3t_jets3t_0_7_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_ipc_1_7_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_codehaus_janino_janino_2_7_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_jpmml_pmml_schema_1_2_15.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_esotericsoftware_minlog_1_3_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_github_fommil_netlib_core_1_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_codec_commons_codec_1_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_slf4j_jul_to_slf4j_1_7_16.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_spire_math_spire_2_11_0_7_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_github_rwl_jtransforms_2_4_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_antlr_antlr4_runtime_4_5_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scalanlp_breeze_2_11_0_11_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_slf4j_slf4j_log4j12_1_7_21.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_sbt_test_interface_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_spark_project_spark_unused_1_0_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_code_findbugs_jsr305_1_3_9.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_netty_netty_all_4_0_29_Final.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_ipc_tests_1_7_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_slf4j_jcl_over_slf4j_1_7_16.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_clearspring_analytics_stream_2_7_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_novocode_junit_interface_0_11.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_auth_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_hdfs_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_zookeeper_zookeeper_3_4_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_hk2_api_2_4_0_b34.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_javassist_javassist_3_18_1_GA.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_mortbay_jetty_jetty_util_6_1_26.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_ws_rs_javax_ws_rs_api_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_mesos_mesos_shaded_protobuf_0_21_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_json4s_json4s_ast_2_11_3_2_11.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_scala_actors_2_11_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scalatest_scalatest_2_11_2_2_6.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_scala_reflect_2_11_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_esotericsoftware_kryo_shaded_3_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_thoughtworks_paranamer_paranamer_2_6.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_mapred_hadoop2_1_7_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_json4s_json4s_core_2_11_3_2_11.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_jvm_3_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_client_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_common_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_sql_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_hk2_utils_2_4_0_b34.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_scala_compiler_2_11_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_xerial_snappy_snappy_java_1_1_2_6.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_3_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_roaringbitmap_RoaringBitmap_0_5_11.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_protobuf_protobuf_java_2_5_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_univocity_univocity_parsers_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_core_3_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_json_3_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_servlet_javax_servlet_api_3_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_curator_curator_client_2_4_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_column_1_7_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_common_1_7_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_hadoop_1_7_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_core_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_tags_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_xbean_xbean_asm5_shaded_4_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_api_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_mllib_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_hk2_locator_2_4_0_b34.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_spire_math_spire_macros_2_11_0_7_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_curator_curator_recipes_2_4_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_jackson_1_7_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_json4s_json4s_jackson_2_11_3_2_11.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scalanlp_breeze_macros_2_11_0_11_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_compress_1_4_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_encoding_1_7_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_graphx_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_sketch_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_unsafe_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_codehaus_janino_commons_compiler_2_7_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_fusesource_leveldbjni_leveldbjni_all_1_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_modules_scala_xml_2_11_1_0_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_core_2_6_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_annotation_javax_annotation_api_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_curator_curator_framework_2_4_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_generator_1_7_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_graphite_3_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_sourceforge_f2j_arpack_combined_all_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_annotations_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_client_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_common_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_catalyst_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_launcher_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_13.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_core_jersey_client_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_core_jersey_common_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_core_jersey_server_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_validation_validation_api_1_1_0_Final.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_streaming_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_13.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_osgi_resource_locator_1_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_6_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_mllib_local_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_external_javax_inject_2_4_0_b34.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_media_jersey_media_jaxb_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_annotations_2_6_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_network_common_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_server_common_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_network_shuffle_2_11_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_format_2_3_0_incubating.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_app_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_bundles_repackaged_jersey_guava_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_core_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_common_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_fasterxml_jackson_module_jackson_module_paranamer_2_6_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_shuffle_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_external_aopalliance_repackaged_2_4_0_b34.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_fasterxml_jackson_module_jackson_module_scala_2_11_2_6_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_modules_scala_parser_combinators_2_11_1_0_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_jobclient_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_containers_jersey_container_servlet_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_containers_jersey_container_servlet_core_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/src/test/scala/linalg/VectorsOpt.scala:
--------------------------------------------------------------------------------
1 | package libble.linalg
2 |
3 | import org.scalatest.FunSuite
4 |
5 | /**
6 | * Created by Aplysia_x on 2016/11/7.
7 | */
8 | class VectorsOpt extends FunSuite {
9 | val sparse = new SparseVector(Array(0, 2), Array(1.0, 3.0), 3)
10 | val dense = new DenseVector(Array(1.0, 2.0, 3.0))
11 | import libble.linalg.implicits.vectorAdOps
12 |
13 | test("norm1"){
14 | assert(sparse.norm1()==4)
15 | assert(dense.norm1()==6)
16 | }
17 | test("norm2"){
18 | assert(sparse.norm2()==math.sqrt(10))
19 | assert(dense.norm2()==math.sqrt(14))
20 | }
21 |
22 |
23 | test("dot"){
24 | assert(sparse*dense==10)
25 | }
26 |
27 | test("plusax"){
28 | assert(dense.plusax(1.0,sparse).norm1==10)
29 | }
30 |
31 | test("scal"){
32 | assert(dense.scal(2.0).norm1()==20)
33 | }
34 |
35 |
36 |
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/.idea/copyright/libble.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/src/main/scala/linalg/package.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * You may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 | package libble.linalg
16 |
17 | import scala.language.implicitConversions
18 | /**
19 | * Here define the implicit method for converting the Vector to VectorsOp.
20 | */
21 | package object implicits {
22 | implicit def vectorAdOps(vec: Vector) = new VectorsOp(vec)
23 | }
24 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_scala_library_2_11_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/src/main/scala/context/Instance.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 |
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License. */
15 | package libble.context
16 |
17 | import libble.linalg.Vector
18 |
19 | /**
20 | * This class is used to denote one term of the training or testing data, which consists of
21 | * one label and one Vector.
22 | * @param label
23 | * @param features
24 | */
25 | case class Instance(val label: Double, val features: Vector) {
26 | override def toString: String = s"($label, $features)"
27 | }
28 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/src/main/scala/utils/XORShiftRandom.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Created by syh on 2016/12/9.
3 | */
4 | package libble.utils
5 |
6 | import java.nio.ByteBuffer
7 | import java.util.{Random => JavaRandom}
8 |
9 | import scala.util.hashing.MurmurHash3
10 |
11 | /**
12 | * This part of code is borrowed from Spark MLlib.
13 | */
14 | class XORShiftRandom(init: Long) extends JavaRandom(init) {
15 |
16 | def this() = this(System.nanoTime)
17 |
18 | private var seed = XORShiftRandom.hashSeed(init)
19 |
20 | // we need to just override next - this will be called by nextInt, nextDouble,
21 | // nextGaussian, nextLong, etc.
22 | override protected def next(bits: Int): Int = {
23 | var nextSeed = seed ^ (seed << 21)
24 | nextSeed ^= (nextSeed >>> 35)
25 | nextSeed ^= (nextSeed << 4)
26 | seed = nextSeed
27 | (nextSeed & ((1L << bits) -1)).asInstanceOf[Int]
28 | }
29 |
30 | override def setSeed(s: Long) {
31 | seed = XORShiftRandom.hashSeed(s)
32 | }
33 | }
34 |
35 | object XORShiftRandom {
36 | /** Hash seeds to have 0/1 bits throughout. */
37 | private def hashSeed(seed: Long): Long = {
38 | val bytes = ByteBuffer.allocate(java.lang.Long.SIZE).putLong(seed).array()
39 | MurmurHash3.bytesHash(bytes)
40 | }
41 | }
--------------------------------------------------------------------------------
/src/main/scala/examples/LoadFile.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * You may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 | package libble.examples
16 |
17 | import org.apache.spark.{SparkConf, SparkContext}
18 |
19 | /***
20 | * Here we test the function of loadlibSVMFile and loadLIBBLEFile.
21 | */
22 | object LoadFile {
23 | def main(args: Array[String]) {
24 |
25 | val conf = new SparkConf()
26 | .setAppName("myTest")
27 | val sc = new SparkContext(conf)
28 |
29 | import libble.context.implicits._
30 | val training=sc.loadLIBBLEFile("sparse.data")
31 | println(training.count())
32 | training.saveAsLIBBLEFile("this.data")
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/src/main/scala/regression/Lasso.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * You may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 | package libble.regression
16 |
17 | import libble.generalizedLinear._
18 |
19 | /**
20 | * This class is the model of LinearRegression with default regularization L1Reg.
21 | *
22 | * @param stepSize
23 | * @param regParam
24 | * @param factor
25 | * @param iters
26 | * @param partsNum
27 | */
28 | class Lasso(stepSize: Double,
29 | regParam: Double,
30 | factor: Double,
31 | iters: Int,
32 | partsNum: Int) extends LinearScope(stepSize, regParam, factor, iters, partsNum) {
33 | def this() = this(1.0, 0.0001, 0.0001, 5, -1)
34 |
35 | setLossFunc(new LeastSquareLoss())
36 | setUpdater(new L1Updater())
37 |
38 |
39 |
40 | clearThreshold
41 |
42 |
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/scala/collaborativeFiltering/MatrixFactorizationModel.scala:
--------------------------------------------------------------------------------
1 | package libble.collaborativeFiltering
2 |
3 | import libble.linalg.Vector
4 | import libble.linalg.implicits._
5 | import org.apache.spark.rdd.RDD
6 |
7 | class MatrixFactorizationModel (rank: Int,
8 | userFactors: RDD[(Int, Vector)],
9 | itemFactors: RDD[(Int, Vector)]) extends Serializable{
10 | def predict (userIndex: Int, itemIndex: Int) : Double = {
11 | val uh = userFactors.lookup(userIndex).head
12 | val vj = itemFactors.lookup(itemIndex).head
13 | uh * vj
14 | }
15 | def predict (indices: RDD[(Int, Int)]): RDD[Rating] = {
16 | val numUsers = indices.keys.distinct().count()
17 | val numItems = indices.values.distinct().count()
18 | if (numUsers > numItems){
19 | itemFactors.join(indices.map(_.swap)).map{
20 | case (item, (item_factors, user)) => (user, (item, item_factors))
21 | }
22 | .join(userFactors).map{
23 | case (user, ((item, item_factors), user_factors)) =>
24 | new Rating(item_factors * user_factors, user, item)
25 | }
26 | }
27 | else{
28 | userFactors.join(indices).map{
29 | case (user, (user_factors, item)) => (item, (user, user_factors))
30 | }
31 | .join(itemFactors).map{
32 | case (item, ((user, user_factors), item_factors)) =>
33 | new Rating(item_factors * user_factors, user, item)
34 | }
35 | }
36 | }
37 | }
--------------------------------------------------------------------------------
/src/main/scala/classification/SVM.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License. */
15 | package libble.classification
16 |
17 | import libble.generalizedLinear.{HingeLoss, L2Updater, LinearScope}
18 |
19 | /**
20 | * This class is the model of SVM with default regularization L2Reg.
21 | *
22 | * @param stepSize
23 | * @param regParam
24 | * @param factor
25 | * @param iters
26 | * @param partsNum
27 | */
28 | class SVM(stepSize: Double,
29 | regParam: Double,
30 | factor: Double,
31 | iters: Int,
32 | partsNum: Int) extends LinearScope(stepSize, regParam, factor, iters, partsNum) {
33 | def this() = this(1.0, 0.0001, 0.0001, 5, -1)
34 |
35 | setLossFunc(new HingeLoss)
36 | setUpdater(new L2Updater)
37 |
38 | /**
39 | * Default threshold is 0.0.
40 | */
41 | setThreshold(0.0)
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/scala/examples/testScaller.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * You may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 | package libble.examples
16 |
17 | import libble.features.Scaller
18 | import org.apache.spark.{SparkConf, SparkContext}
19 |
20 | /**
21 | * This is an example of using Scaller.
22 | */
23 | object testScaller {
24 | def main(args: Array[String]) {
25 |
26 | val conf = new SparkConf()
27 | .setAppName("myTest")
28 | val sc = new SparkContext(conf)
29 |
30 | import libble.context.implicits.sc2LibContext
31 | val training = sc.loadLIBBLEFile("sparse.data")
32 |
33 | val scaller = new Scaller(true, true)
34 | val features = training.map(_.features)
35 | scaller.computeFactor(features)
36 |
37 | println("center:" + scaller.getCenter.get)
38 | println("std:" + scaller.getStd.get)
39 | val result = scaller.transform(features).collect()
40 | println(result.mkString(", "))
41 |
42 |
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/scala/regression/LinearRegression.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License. */
15 | package libble.regression
16 |
17 | import libble.generalizedLinear.{ L2Updater, LeastSquareLoss, LinearScope}
18 |
19 | /**
20 | * This is the model of LinearRegression with default regularization L1Reg.
21 | *
22 | * @param stepSize
23 | * @param regParam
24 | * @param factor
25 | * @param iters
26 | * @param partsNum
27 | */
28 | class LinearRegression(stepSize: Double,
29 | regParam: Double,
30 | factor: Double,
31 | iters: Int,
32 | partsNum: Int) extends LinearScope(stepSize, regParam, factor, iters, partsNum) {
33 | def this() = this(1.0, 0.0001, 0.0001, 5, -1)
34 |
35 | setLossFunc(new LeastSquareLoss)
36 | setUpdater(new L2Updater())
37 |
38 | /**
39 | * Set the output to be the predict value.
40 | */
41 | clearThreshold
42 |
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/scala/classification/LogisticRegression.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 |
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License. */
15 | package libble.classification
16 |
17 | import libble.generalizedLinear.{L2Updater, LinearScope, LogisticLoss}
18 |
19 | /**
20 | * This class is the model of LogisticRegression with default regularization L2Reg.
21 | *
22 | * @param stepSize
23 | * @param regParam
24 | * @param factor
25 | * @param iters
26 | * @param partsNum
27 | */
28 | class LogisticRegression(stepSize: Double,
29 | regParam: Double,
30 | factor: Double,
31 | iters: Int,
32 | partsNum: Int) extends LinearScope(stepSize, regParam, factor, iters, partsNum) {
33 | def this() = this(1.0, 0.0001, 0.0001, 5, -1)
34 |
35 | setLossFunc(new LogisticLoss())
36 | setUpdater(new L2Updater())
37 |
38 |
39 | /**
40 | * Default threshold is 0.5.
41 | */
42 | setThreshold(0.5)
43 |
44 | /**
45 | * Set the classNum
46 | *
47 | * @param classNum
48 | * @return this
49 | */
50 | override def setClassNum(classNum: Int): LogisticRegression.this.type ={
51 | super.setClassNum(classNum)
52 | setLossFunc(new LogisticLoss(classNum))
53 |
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/scala/examples/testKMeans.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * You may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package libble.examples
17 |
18 | import libble.clustering.KMeans
19 | import org.apache.log4j.{Level, Logger}
20 | import org.apache.spark.{SparkConf, SparkContext}
21 |
22 | import scala.collection.mutable
23 |
24 | /**
25 | * Created by Aplysia_x on 2016/12/9.
26 | */
27 | object testKMeans {
28 | def main(args: Array[String]) {
29 |
30 | if (args.length < 1) {
31 | System.err.println("Usage: ~ path:String --k=Int --maxIters=Int --stopBound=Double")
32 | System.exit(1)
33 | }
34 | // System.setProperty("hadoop.home.dir", "D:\\Program Files\\hadoop-2.6.0")
35 |
36 | val optionsList = args.drop(1).map { arg =>
37 | arg.dropWhile(_ == '-').split('=') match {
38 | case Array(opt, v) => (opt -> v)
39 | case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
40 | }
41 | }
42 | val options = mutable.Map(optionsList: _*)
43 | Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
44 | Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
45 |
46 | val conf = new SparkConf()
47 | .setAppName("My Test Kmeans")
48 | val sc = new SparkContext(conf)
49 |
50 | val k = options.remove("k").map(_.toInt).getOrElse(10)
51 | val maxIters = options.remove("maxIters").map(_.toInt).getOrElse(10)
52 | val stopBound = options.remove("stopBound").map(_.toDouble).getOrElse(0.0001)
53 |
54 | import libble.context.implicits.sc2LibContext
55 | val training = sc.loadLIBBLEFile(args(0))
56 | val m = new KMeans(k, maxIters, stopBound)
57 | val data = training.map(e => (e.label, e.features))
58 | m.train(data)
59 | }
60 | }
--------------------------------------------------------------------------------
/src/main/scala/dimReduction/SVD.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * You may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 |
16 | package libble.dimReduction
17 |
18 | import java.util.Calendar
19 |
20 | import libble.linalg.Vector
21 | import libble.linalg.implicits._
22 | import org.apache.spark.rdd.RDD
23 |
24 | import scala.collection.mutable.ArrayBuffer
25 |
26 |
27 | /**
28 | * This is the model of SVD
29 | *
30 | * @param K
31 | * @param bound
32 | * @param stepSize
33 | * @param iteration
34 | * @param parts
35 | * @param batchSize
36 | */
37 |
38 | class SVD(var K: Int,
39 | var bound: Double,
40 | var stepSize: Double,
41 | var iteration: Int,
42 | var parts: Int,
43 | var batchSize: Int) extends Serializable {
44 | var eigenvalues = new ArrayBuffer[Double]()
45 | var eigenvectors = new ArrayBuffer[Vector]()
46 |
47 |
48 | /**
49 | *
50 | * This method generates singular values matrix and right singular vectors.
51 | *
52 | * @param training
53 | */
54 | def train(training: RDD[Vector]): (Array[Double], Array[Vector]) = {
55 | val st = Calendar.getInstance().getTimeInMillis
56 | val m = new GLS_Matrix_Batch(stepSize, 0.0, 0.0, iteration, parts, batchSize, K)
57 | m.setStopBound(bound)
58 | val model = m.train(training)
59 |
60 | /**
61 | *
62 | * v is the right singular matrix
63 | * Singular values matrix which is square root of eigenvalues matrix.
64 | *
65 | */
66 | for (k <- 0 to K - 1) {
67 | val v = model._1(k)
68 | val lambda = training.map(x => Math.pow(x * v, 2)).reduce(_ + _)
69 | eigenvalues.append(math.sqrt(lambda))
70 | eigenvectors.append(v)
71 | }
72 |
73 | println(s"time to calculate the top ${K} eigen is: " + (Calendar.getInstance().getTimeInMillis - st))
74 | (eigenvalues.toArray, eigenvectors.toArray)
75 |
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/scala/examples/testLR.scala:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
4 | * All Rights Reserved.
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * You may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | */
17 | package libble.examples
18 |
19 | import libble.classification.LogisticRegression
20 | import org.apache.log4j.{Level, Logger}
21 | import org.apache.spark.{SparkConf, SparkContext}
22 |
23 | import scala.collection.mutable
24 |
25 | /** *
26 | * Here is the example of using LogisticRegression.
27 | */
28 | object testLR {
29 | def main(args: Array[String]) {
30 |
31 | if (args.length < 1) {
32 | System.err.println("Usage: ~ path:String --elasticF=Double --numIters=Int --stepSize=Double --regParam=Double --nuPart=Int --numClasses=Int")
33 | System.exit(1)
34 | }
35 |
36 | val optionsList = args.drop(1).map { arg =>
37 | arg.dropWhile(_ == '-').split('=') match {
38 | case Array(opt, v) => (opt -> v)
39 | case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
40 | }
41 | }
42 | val options = mutable.Map(optionsList: _*)
43 | Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
44 | Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
45 |
46 | val conf = new SparkConf()
47 | .setAppName("myTest")
48 | val sc = new SparkContext(conf)
49 |
50 | val stepSize = options.remove("stepSize").map(_.toDouble).getOrElse(1.0)
51 | val regParam = options.remove("regParam").map(_.toDouble).getOrElse(0.00001)
52 | val numIter = options.remove("numIters").map(_.toInt).getOrElse(5)
53 | val elasticF = options.remove("elasticF").map(_.toDouble).getOrElse(0.00001)
54 | val numPart = options.remove("numPart").map(_.toInt).getOrElse(20)
55 | val numClasses = options.remove("numClasses").map(_.toInt).getOrElse(2)
56 | import libble.context.implicits.sc2LibContext
57 | val training = sc.loadLIBBLEFile(args(0), numPart)
58 | val m = new LogisticRegression(stepSize, regParam, elasticF, numIter, numPart)
59 | m.train(training)
60 |
61 |
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/main/scala/examples/testSVD.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 |
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License. */
15 | package libble.examples
16 |
17 | import libble.dimReduction.SVD
18 | import org.apache.log4j.{Level, Logger}
19 | import org.apache.spark.{SparkConf, SparkContext}
20 |
21 | import scala.collection.mutable
22 |
23 | /**
24 | * This is an example of using SVD.
25 | */
26 | object testSVD {
27 | def main(args: Array[String]): Unit = {
28 | Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
29 | Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
30 | System.setProperty("spark.ui.port", "4042")
31 | System.setProperty("spark.akka.frameSize", "100")
32 |
33 | val conf = new SparkConf().setAppName("testSVD")
34 | conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
35 | conf.set("spark.kryoserializer.buffer.max", "2000m")
36 | val sc = new SparkContext(conf)
37 |
38 | if (args.length < 5) {
39 | System.err.println("Usage: ~ path:String --elasticF=Double --numIters=Int --stepSize=Double --regParam=Double --nuPart=Int --numClasses=Int")
40 | System.exit(1)
41 | }
42 | val optionsList = args.drop(1).map { arg =>
43 | arg.dropWhile(_ == '-').split('=') match {
44 | case Array(opt, v) => (opt -> v)
45 | case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
46 | }
47 | }
48 | val options = mutable.Map(optionsList: _*)
49 |
50 | val stepSize = options.remove("stepSize").map(_.toDouble).getOrElse(0.1)
51 | val numIters = options.remove("numIters").map(_.toInt).getOrElse(10)
52 | val numPart = options.remove("numPart").map(_.toInt).getOrElse(2)
53 | val K = options.remove("k").map(_.toInt).getOrElse(1)
54 | val bound = options.remove("bound").map(_.toDouble).getOrElse(1e-6)
55 | val batchSize = options.remove("batchSize").map(_.toInt).getOrElse(100)
56 |
57 | /*
58 | * Scope SVD
59 | */
60 | import libble.context.implicits._
61 | val training = sc.loadLIBBLEFile(args(0)).map(_.features)
62 |
63 | val mysvd = new SVD(K, bound, stepSize, numIters, numPart, batchSize) //matrix, altogether update eigens
64 |
65 | val SVDModel = mysvd.train(training)
66 |
67 | val sigma = SVDModel._1
68 | val v = SVDModel._2
69 |
70 | sigma.foreach(x=>print(x+","))
71 | v.foreach(x=>println(x))
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/scala/examples/testPCA.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 |
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License. */
15 | package libble.examples
16 |
17 | import libble.dimReduction.PCA
18 | import org.apache.log4j.{Level, Logger}
19 | import org.apache.spark.{SparkContext, SparkConf}
20 |
21 | import scala.collection.mutable
22 |
23 | /**
24 | * This is an example of using PCA.
25 | */
26 | object testPCA {
27 | def main(args: Array[String]): Unit = {
28 | Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
29 | Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
30 | System.setProperty("spark.ui.port", "4042")
31 | System.setProperty("spark.akka.frameSize", "100")
32 |
33 | val conf = new SparkConf().setAppName("testPCA")
34 | conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
35 | conf.set("spark.kryoserializer.buffer.max", "2000m")
36 | val sc = new SparkContext(conf)
37 |
38 | if (args.length < 5) {
39 | System.err.println("Usage: ~ path:String --elasticF=Double --numIters=Int --stepSize=Double --regParam=Double --nuPart=Int --numClasses=Int")
40 | System.exit(1)
41 | }
42 | val optionsList = args.drop(1).map { arg =>
43 | arg.dropWhile(_ == '-').split('=') match {
44 | case Array(opt, v) => (opt -> v)
45 | case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
46 | }
47 | }
48 | val options = mutable.Map(optionsList: _*)
49 |
50 | val stepSize = options.remove("stepSize").map(_.toDouble).getOrElse(0.1)
51 | val numIters = options.remove("numIters").map(_.toInt).getOrElse(10)
52 | val numPart = options.remove("numPart").map(_.toInt).getOrElse(2)
53 | val K = options.remove("k").map(_.toInt).getOrElse(1)
54 | val bound = options.remove("bound").map(_.toDouble).getOrElse(1e-6)
55 | val batchSize = options.remove("batchSize").map(_.toInt).getOrElse(100)
56 |
57 | /*
58 | * Scope PCA
59 | */
60 | import libble.context.implicits._
61 | val training = sc.loadLIBBLEFile(args(0)).map(_.features)
62 |
63 | val mypca = new PCA(K, bound, stepSize, numIters, numPart, batchSize) //matrix, altogether update eigens
64 | val PCAModel = mypca.train(training)
65 |
66 | val pc = PCAModel._2
67 | pc.foreach(x => println(x))
68 | val projected = mypca.transform(training, pc)
69 | projected.collect().foreach(x => println(x))
70 |
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/src/main/scala/examples/testCF.scala:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
4 | * All Rights Reserved.
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * You may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | */
17 |
18 | /**
19 | * We licence this file to you under the Apache Licence 2.0; you could get a copy
20 | * of the licence from http://www.apache.org/licenses/LICENSE-2.0.
21 | */
22 | package libble.examples
23 |
24 | import libble.collaborativeFiltering.{ MatrixFactorizationByScope, MatrixFactorization, Rating}
25 | import org.apache.log4j.{Level, Logger}
26 | import org.apache.spark.{SparkConf, SparkContext}
27 |
28 | import scala.collection.mutable
29 |
30 |
31 | /***
32 | * Here is the example of using Matrix Factorization.
33 | */
34 | object testCF {
35 | def main(args: Array[String]) {
36 | val optionsList = args.map { arg =>
37 | arg.dropWhile(_ == '-').split('=') match {
38 | case Array(opt, v) => (opt -> v)
39 | case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
40 | }
41 | }
42 | val options = mutable.Map(optionsList: _*)
43 | Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
44 | Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
45 |
46 | val conf = new SparkConf()
47 | .setAppName("testMF")
48 | val sc = new SparkContext(conf)
49 |
50 | val trainsetPath = options.remove("trainset").map(_.toString).getOrElse("data\\testMF.txt")
51 | val stepsize = options.remove("stepsize").map(_.toDouble).getOrElse(0.01)
52 | val regParam_u = options.remove("regParam_u").map(_.toDouble).getOrElse(0.05)
53 | val regParam_v = options.remove("regParam_u").map(_.toDouble).getOrElse(0.05)
54 | val numIters = options.remove("numIters").map(_.toInt).getOrElse(50)
55 | val numParts = options.remove("numParts").map(_.toInt).getOrElse(16)
56 | val rank = options.remove("rank").map(_.toInt).getOrElse(40)
57 | val testsetPath = options.remove("testset").map(_.toString)
58 | val stepsize2 = options.remove("stepsize2").map(_.toDouble).getOrElse(0.1)
59 | val ifPrintLoss = options.remove("ifPrintLoss").map(_.toInt).getOrElse(0)
60 |
61 | val trainSet = sc.textFile(trainsetPath, numParts)
62 | .map(_.split(',') match { case Array(user, item, rate) =>
63 | Rating(rate.toDouble, user.toInt, item.toInt)
64 | })
65 |
66 | val model = new MatrixFactorization()
67 | .train(trainSet,
68 | numIters,
69 | numParts,
70 | rank,
71 | regParam_u,
72 | regParam_v,
73 | stepsize,
74 | ifPrintLoss)
75 |
76 | if(testsetPath.isDefined) {
77 | val testSet = sc.textFile(testsetPath.get, numParts)
78 | .map(_.split(',') match { case Array(user, item, rate) =>
79 | Rating(rate.toDouble, user.toInt, item.toInt)
80 | })
81 |
82 | val result = model.predict(testSet.map(r => (r.index_x, r.index_y)))
83 | val joinRDD = result.map(r => ((r.index_x, r.index_y), r.rating))
84 | .join(testSet.map(r => ((r.index_x, r.index_y), r.rating)))
85 |
86 | // println(s"size of testSet: ${testSet.count()}")
87 | // println(s"size of joinRDD: ${joinRDD.count()}")
88 | val rmse = joinRDD.values
89 | .map(i => math.pow(i._1 - i._2, 2))
90 | .mean()
91 | println(s"rmse of test set: ${math.sqrt(rmse)}")
92 | }
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/src/main/scala/generalizedLinear/Regularizer.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * We licence this file to you under the Apache Licence 2.0; you could get a copy
3 | * of the licence from http://www.apache.org/licenses/LICENSE-2.0.
4 | */
5 | package libble.generalizedLinear
6 |
7 | import libble.linalg.Vector
8 | import libble.linalg.implicits._
9 |
10 | /**
11 | *
12 | */
13 | abstract class Updater extends Serializable {
14 | /**
15 | * In this method, we update the weight with weightnew= weightOld+stepSize*(gradient+regParam* delte(regularizer)).
16 | * Where delta(regularizer) is the gradient of regularizer.
17 | *
18 | * @param weights
19 | * @param gradient
20 | * @param stepSize
21 | * @param regParam
22 | * @return weightNew
23 | */
24 | def update(weights: Vector, gradient: Vector, stepSize: Double, regParam: Double): Unit
25 |
26 | /**
27 | * In this method, we give the cost of the regularizer
28 | *
29 | * @param weight
30 | * @param regParam
31 | * @return regCost
32 | */
33 | def getRegVal(weight: Vector, regParam: Double): Double
34 |
35 | }
36 |
37 | /**
38 | *
39 | */
40 | class simpleUpdater extends Updater {
41 | /**
42 | * In this method, we update the weight with weightnew= weightOld+stepSize*(gradient+regParam* delte(regularizer)).
43 | * Where delta(regularizer) is the gradient of regularizer.
44 | *
45 | * @param weights
46 | * @param gradient
47 | * @param stepSize
48 | * @param regParam
49 | * @return weightNew
50 | */
51 | override def update(weights: Vector, gradient: Vector, stepSize: Double, regParam: Double): Unit = {
52 | weights.plusax(-stepSize, gradient)
53 | }
54 |
55 | /**
56 | * In this method, we give the cost of the regularizer
57 | *
58 | * @param weight
59 | * @param regParam
60 | * @return regCost
61 | */
62 | override def getRegVal(weight: Vector, regParam: Double): Double = {
63 | 0.0
64 | }
65 | }
66 |
67 | /**
68 | *
69 | */
70 | class L1Updater extends Updater{
71 | /**
72 | * In this method, we update the weight with weightnew= weightOld+stepSize*(gradient+regParam* delte(regularizer)).
73 | * Where delta(regularizer) is the gradient of regularizer.
74 | *
75 | * @param weights
76 | * @param gradient
77 | * @param stepSize
78 | * @param regParam
79 | * @return weightNew
80 | */
81 | override def update(weights: Vector, gradient: Vector, stepSize: Double, regParam: Double): Unit = {
82 | weights.plusax(-stepSize, gradient)
83 | val reg_step = regParam * stepSize
84 | val weightsValues = weights.toArray
85 | var offset = 0
86 | while (offset < weights.size) {
87 | weightsValues(offset) = math.signum(weightsValues(offset)) * math.max(0.0, math.abs(weightsValues(offset) - reg_step))
88 | offset += 1
89 | }
90 | }
91 |
92 | /**
93 | * In this method, we give the cost of the regularizer
94 | *
95 | * @param weight
96 | * @param regParam
97 | * @return regCost
98 | */
99 | override def getRegVal(weight: Vector, regParam: Double): Double = {
100 | weight.norm1 * regParam
101 | }
102 | }
103 |
104 | /**
105 | *
106 | */
107 | class L2Updater extends Updater {
108 | /**
109 | * In this method, we update the weight with weightnew= weightOld+stepSize*(gradient+regParam* delte(regularizer)).
110 | * Where delta(regularizer) is the gradient of regularizer.
111 | *
112 | * @param weights
113 | * @param gradient
114 | * @param stepSize
115 | * @param regParam
116 | * @return weightNew
117 | */
118 | override def update(weights: Vector, gradient: Vector, stepSize: Double, regParam: Double): Unit = {
119 | weights *= (1 - stepSize * regParam)
120 | weights.plusax(-stepSize, gradient)
121 | }
122 |
123 | /**
124 | * In this method, we give the cost of the regularizer
125 | *
126 | * @param weight
127 | * @param regParam
128 | * @return regCost
129 | */
130 | override def getRegVal(weight: Vector, regParam: Double): Double = {
131 | val norm = weight.norm2
132 | 0.5 * regParam * norm * norm
133 | }
134 | }
135 |
136 |
--------------------------------------------------------------------------------
/src/main/scala/dimReduction/PCA.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License. */
15 | package libble.dimReduction
16 |
17 | import java.util.Calendar
18 |
19 | import libble.linalg.implicits._
20 | import libble.linalg.{DenseVector, Vector}
21 | import org.apache.spark.rdd.RDD
22 |
23 | import scala.collection.mutable.ArrayBuffer
24 |
25 | /**
26 | *
27 | * This class is the model of PCA.
28 | *
29 | * @param K
30 | * @param bound
31 | * @param stepSize
32 | * @param iteration
33 | * @param parts
34 | * @param batchSize
35 | */
36 |
37 | class PCA(var K: Int,
38 | var bound: Double,
39 | var stepSize: Double,
40 | var iteration: Int,
41 | var parts: Int,
42 | var batchSize: Int) extends Serializable {
43 | require(K >= 1, s"K is the number of principal components, it should be that K >= 1 but was given $K")
44 |
45 | var eigenvalues = new ArrayBuffer[Double]()
46 | var eigenvectors = new ArrayBuffer[Vector]()
47 |
48 |
49 | /**
50 | *
51 | * This method generates the K principle components and their relating eigenvalues.
52 | *
53 | * @param training
54 | */
55 | def train(training: RDD[Vector]): (Array[Double], Array[Vector]) = {
56 |
57 | require(K <= training.first().size,
58 | s"data dimension size is ${training.first().size}, it must be greater than K=$K")
59 |
60 | val centerData = centralize(training)
61 |
62 | val st = Calendar.getInstance().getTimeInMillis
63 | val m = new GLS_Matrix_Batch(stepSize, 0.0, 0.0, iteration, parts, batchSize, K)
64 | m.setStopBound(bound)
65 | val model = m.train(centerData)
66 |
67 | /**
68 | * v is the kth principle components.
69 | * lambda is the kth largest eigenvalues corresponding to v.
70 | */
71 | for (k <- 0 to K - 1) {
72 | val v = model._1(k)
73 | val lambda = (1.0 / (centerData.count() - 1)) * centerData.map(x => Math.pow(x * v, 2)).reduce(_ + _)
74 | eigenvalues.append(lambda)
75 | eigenvectors.append(v)
76 | }
77 |
78 | println(s"time to calculate the top ${K} eigen is: " + (Calendar.getInstance().getTimeInMillis - st))
79 | (eigenvalues.toArray, eigenvectors.toArray)
80 |
81 | }
82 |
83 | /**
84 | *
85 | * This method centralizes raw data which is the first step of PCA.
86 | *
87 | * @param data
88 | *
89 | */
90 | def centralize(data: RDD[Vector]): RDD[Vector] = {
91 | val count = data.count()
92 | val numF = data.first().size
93 | val average = data.treeAggregate(new DenseVector(numF))(
94 | seqOp = (c, v) => {
95 | c += v
96 | c
97 | }, combOp = (c1, c2) => {
98 | c2 += c1
99 | c2
100 | }
101 | )
102 | average /= count
103 | val aver = data.context.broadcast(average)
104 |
105 | val panedData = data.map { e =>
106 | val newFeatures = new DenseVector(e.toArray)
107 | newFeatures -= aver.value
108 | newFeatures.vector
109 |
110 | }
111 | panedData
112 | }
113 |
114 |
115 | /**
116 | *
117 | * This method projects raw data to new feature space using principle components.
118 | *
119 | * @param rawData
120 | * @param pc
121 | *
122 | */
123 | def transform(rawData: RDD[Vector], pc: Array[Vector]): RDD[Vector] = {
124 | val projected = rawData.map { ins =>
125 | val arr = new ArrayBuffer[Double]()
126 | for (k <- pc.indices) {
127 | arr.append(ins * pc(k))
128 | }
129 | new DenseVector(arr.toArray).vector
130 |
131 | }
132 | projected
133 | }
134 |
135 | }
136 |
--------------------------------------------------------------------------------
/src/main/scala/context/implicits.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * You may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | * See the License for the specific language governing permissions and
13 | * limitations under the License.
14 | */
15 | package libble.context
16 |
17 | import libble.linalg.{DenseVector, SparseVector}
18 | import org.apache.spark.SparkContext
19 | import org.apache.spark.rdd.RDD
20 |
21 | import scala.language.implicitConversions
22 | /**
23 | * Here we define the implicit convert function.
24 | */
25 | object implicits {
26 | implicit def sc2LibContext(sc: SparkContext) = new LibContext(sc)
27 |
28 | implicit def RDD2LIBBLERDD(data: RDD[Instance]) = new LIBBLERDD(data)
29 | }
30 |
31 | /**
32 | * This class includes the methods of loading LIBBLEFILE from the file system.
33 | *
34 | * @param sc
35 | */
36 | class LibContext(val sc: SparkContext) {
37 | /**
38 | * Load LibSVM file from the File System with default parallelization.
39 | *
40 | * @param path
41 | * @return RDD[Instance]
42 | * @deprecated replaced by function loadLibSVMFile
43 | */
44 | def loadLibSVMFile(path: String): RDD[Instance] = {
45 | loadLibSVMFile(path, -1)
46 | }
47 |
48 | /**
49 | * Load LibSVM file from the File System with given parallelization.
50 | *
51 | * @param path
52 | * @param partsNum
53 | * @return RDD[Instance]
54 | * @deprecated replaced by function loadLibSVMFile
55 | */
56 | def loadLibSVMFile(path: String, partsNum: Int): RDD[Instance] = {
57 | val lines = {
58 | if (partsNum > 0) sc.textFile(path, partsNum) else sc.textFile(path)
59 | }.map(_.trim)
60 | .filter(line => !(line.isEmpty || line.startsWith("#")))
61 | val terms = lines.filter(_.split(" ").length != 1).map { line =>
62 | val items = line.split(" ")
63 | val label = items.head.toDouble
64 | val term = items.tail.filter(_.nonEmpty).map { item =>
65 | val temp = item.split(":")
66 | (temp.head.toInt - 1, temp.last.toDouble)
67 | }.unzip
68 | (label, term._1, term._2)
69 | }.cache()
70 | val d = terms.map(_._2.lastOption.getOrElse(0))
71 | .reduce(math.max) + 1
72 | terms.map { term =>
73 | new Instance(term._1, new SparseVector(term._2.toArray, term._3.toArray, d))
74 |
75 | }
76 | }
77 |
78 | /**
79 | * Load LIBBLE file from File System with default parallelization
80 | * Compatible with LibSVM file.
81 | *
82 | * @param path
83 | * @return RDD[Instance]
84 | */
85 | def loadLIBBLEFile(path: String): RDD[Instance] = {
86 | loadLIBBLEFile(path, -1)
87 | }
88 |
89 | /**
90 | * Load LIBBLE file from File System with given parallelization.
91 | * Compatible with LibSVM file.
92 | *
93 | * @param path
94 | * @param partsNum
95 | * @return RDD[Instance]
96 | */
97 | def loadLIBBLEFile(path: String, partsNum: Int): RDD[Instance] = {
98 | val lines = {
99 | if (partsNum > 0) sc.textFile(path, partsNum) else sc.textFile(path)
100 | }.map(_.trim)
101 | .filter(line => !(line.isEmpty || line.startsWith("#")))
102 | lines.first().contains(":") match {
103 | case true => {
104 | val terms = lines.map { line =>
105 | val items = line.split(' ')
106 | val label = items.head.toDouble
107 | val term = items.tail.filter(_.nonEmpty).map { item =>
108 | val temp = item.split(':')
109 | (temp.head.toInt - 1, temp.last.toDouble)
110 | }.unzip
111 | (label, term._1, term._2)
112 | }.cache()
113 |
114 | val d = terms.map(_._2.lastOption.getOrElse(0)).reduce(math.max) + 1
115 | terms.map { term =>
116 | new Instance(term._1, new SparseVector(term._2.toArray, term._3.toArray, d))
117 | }
118 | }
119 | case false => {
120 | lines.map { line =>
121 | val items = line.split(' ')
122 | new Instance(items.head.toDouble, new DenseVector(items.drop(1).map(_.toDouble)))
123 | }
124 | }
125 | }
126 |
127 | }
128 |
129 |
130 | }
131 |
132 |
133 | /**
134 | * With this class, we add save-data methods to the RDD[Instance].
135 | *
136 | * @param data
137 | */
138 | class LIBBLERDD(val data: RDD[Instance]) {
139 | /**
140 | * Save data to File System in LibSVM format.
141 | *
142 | * @param path
143 | * @deprecated
144 | */
145 | def saveAsLibSVMFile(path: String): Unit = {
146 | data.map { term =>
147 | val line = new StringBuilder(term.label.toString)
148 | term.features.foreachActive { (i, v) =>
149 | line ++= s" ${i + 1}:$v"
150 | }
151 | line.mkString
152 | }.saveAsTextFile(path)
153 | }
154 |
155 | /**
156 | * Save data to File System in LIBBLE format.
157 | *
158 | * @param path
159 | */
160 | def saveAsLIBBLEFile(path: String): Unit = {
161 | val first = data.first()
162 | first.features match {
163 | case sv: SparseVector => {
164 | data.map { term =>
165 | val line = new StringBuilder(term.label.toString)
166 | term.features.foreachActive { (i, v) =>
167 | line ++= s" ${i + 1}:$v"
168 | }
169 | line.mkString
170 | }.saveAsTextFile(path)
171 | }
172 | case dv: DenseVector => {
173 | data.map { term =>
174 | (term.label +: term.features.toArray).mkString(" ")
175 | }.saveAsTextFile(path)
176 | }
177 | }
178 | }
179 |
180 |
181 | }
182 |
183 |
184 |
--------------------------------------------------------------------------------
/src/main/scala/clustering/KMeans.scala:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
4 | * All Rights Reserved.
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * You may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package libble.clustering
18 |
19 | import java.util
20 |
21 | import libble.linalg.implicits.vectorAdOps
22 | import libble.linalg.{DenseVector, Vector}
23 | import org.apache.spark.rdd.RDD
24 |
25 | /**
26 | * KMeans Algorithm.
27 | */
28 | class KMeans(
29 | private var k: Int,
30 | private var maxIters: Int,
31 | private var stopBound: Double = 0) extends Serializable {
32 |
33 | @transient
34 | private var initCenters: Option[Array[(Vector, Double)]] = None
35 |
36 | def this(k: Int, stopBound: Double) = this(k, 100, stopBound)
37 |
38 | /**
39 | * set the number of clusters
40 | *
41 | * @param k
42 | * @return this
43 | */
44 | def setK(k: Int): this.type = {
45 | this.k = k
46 | this
47 | }
48 |
49 | /**
50 | * set the Max Iter
51 | *
52 | * @param maxIters
53 | * @return this
54 | */
55 | def setMaxIters(maxIters: Int): this.type = {
56 | this.maxIters = maxIters
57 | this
58 | }
59 |
60 | /**
61 | * set the convergence bound
62 | *
63 | * @param stopBound
64 | * @return this
65 | */
66 | def setStopBound(stopBound: Double): this.type = {
67 | this.stopBound = stopBound
68 | this
69 | }
70 |
71 | /**
72 | * set the init Centers
73 | *
74 | * @param initCenters
75 | * @return this
76 | */
77 | def setInitCenters(initCenters: Array[(Vector, Double)]): this.type = {
78 | require(initCenters.length == k)
79 | this.initCenters = Some(initCenters)
80 | this
81 | }
82 |
83 | /**
84 | * Do K-Means train
85 | *
86 | * @param data
87 | * @tparam T
88 | * @return (KMeansModel,cost)
89 | */
90 | def train[T](data: RDD[(T, Vector)]): (KMeansModel, Double) = {
91 | val centers = initCenters.getOrElse(initCenter(data))
92 |
93 | val trainData = data.map(e => (e._2, e._2.norm2)).cache()
94 | val squareStopBound = stopBound * stopBound
95 |
96 | var isConvergence = false
97 | var i = 0
98 | val costs = data.sparkContext.doubleAccumulator
99 |
100 | while (!isConvergence && i < maxIters) {
101 | costs.reset()
102 | val br_centers = data.sparkContext.broadcast(centers)
103 |
104 | val res = trainData.mapPartitions { iter =>
105 | val counts = new Array[Int](k)
106 | util.Arrays.fill(counts, 0)
107 | val partSum = (0 until k).map(e => new DenseVector(br_centers.value(0)._1.size))
108 |
109 | iter.foreach { e =>
110 | val (index, cost) = KMeans.findNearest(e, br_centers.value)
111 | costs.add(cost)
112 | counts(index) += 1
113 | partSum(index) += e._1
114 | }
115 | counts.indices.filter(j => counts(j) > 0).map(j => (j -> (partSum(j), counts(j)))).iterator
116 | }.reduceByKey { case ((s1, c1), (s2, c2)) =>
117 | (s1 += s2, c1 + c2)
118 | }.collectAsMap()
119 | br_centers.unpersist(false)
120 |
121 |
122 | println(s"cost at iter: $i is: ${costs.value}")
123 | isConvergence = true
124 | res.foreach { case (index, (sum, count)) =>
125 | sum /= count
126 | val sumNorm2 = sum.norm2()
127 | val squareDist = math.pow(centers(index)._2, 2.0) + math.pow(sumNorm2, 2.0) - 2 * (centers(index)._1 * sum)
128 | if (squareDist >= squareStopBound) {
129 | isConvergence = false
130 | }
131 | centers(index) = (sum, sumNorm2)
132 | }
133 | i += 1
134 | }
135 | (new KMeansModel(centers), costs.value)
136 | }
137 |
138 |
139 | private def initCenter[T](data: RDD[(T, Vector)]): Array[(Vector, Double)] = {
140 | data.takeSample(false, k, System.currentTimeMillis())
141 | .map(_._2).distinct.map(e => (e, e.norm2))
142 | }
143 |
144 | override def equals(other: Any): Boolean = other match {
145 | case that: KMeans =>
146 | (that canEqual this) &&
147 | initCenters == that.initCenters &&
148 | k == that.k &&
149 | maxIters == that.maxIters &&
150 | stopBound == that.stopBound
151 | case _ => false
152 | }
153 |
154 | def canEqual(other: Any): Boolean = other.isInstanceOf[KMeans]
155 |
156 | override def hashCode(): Int = {
157 | val state = Seq(initCenters, k, maxIters, stopBound)
158 | state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
159 | }
160 | }
161 |
162 | object KMeans {
163 | def findNearest(e: (Vector, Double), centers: Array[(Vector, Double)]): (Int, Double) = {
164 | var cost = Double.MaxValue
165 | var index = 0;
166 | for (i <- 0 until centers.length) {
167 | val center = centers(i)
168 | if (math.pow(e._2 - center._2, 2.0) < cost) {
169 | val squarePart = math.pow(e._2, 2.0) + math.pow(center._2, 2.0)
170 | val squareDist = squarePart - 2 * (e._1 * center._1)
171 | if (squareDist < cost) {
172 | cost = squareDist
173 | index = i
174 | }
175 | }
176 | }
177 | (index, cost)
178 | }
179 | }
180 |
181 |
182 | class KMeansModel(centers: Array[(Vector, Double)]) extends Serializable {
183 |
184 | def clustering[T](data: RDD[(T, Vector)]): RDD[(T, Int)] = {
185 | val br_center = data.sparkContext.broadcast(centers)
186 | data.map { e =>
187 | val res = KMeans.findNearest((e._2, e._2.norm2), br_center.value)
188 | (e._1, res._1)
189 | }
190 |
191 | }
192 |
193 |
194 | }
195 |
196 |
197 |
198 |
199 |
200 |
--------------------------------------------------------------------------------
/src/main/scala/features/Scaller.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 |
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License. */
15 | package libble.features
16 |
17 | import libble.linalg.implicits.vectorAdOps
18 | import libble.linalg.{DenseVector, SparseVector, Vector}
19 | import org.apache.spark.rdd.RDD
20 | import scala.beans.BeanProperty
21 |
22 |
23 | /**
24 | * With this class, we scale the data to standard normal space in feature-wise.
25 | * @param centerlized
26 | * @param scalStd
27 | */
28 |
29 | class Scaller(var centerlized: Boolean = false, var scalStd: Boolean = true) extends Serializable {
30 | @BeanProperty var center: Option[Vector] = None
31 | @BeanProperty var std: Option[Vector] = None
32 |
33 | /**
34 | * Compute center or std of the data.
35 | * @param data
36 | */
37 | def computeFactor(data: RDD[Vector]): Unit = (centerlized, scalStd) match {
38 | case (true, false) => {
39 | center = Some(computeCenter(data))
40 |
41 | }
42 | case (true, true) => {
43 | center = Some(computeCenter(data))
44 | std = Some(coputeVariance(data))
45 | }
46 | case (false, true) => {
47 | std = Some(coputeVariance(data))
48 | }
49 | case (false, false) => {
50 | throw new IllegalArgumentException("you need not a scaller!!!")
51 | }
52 |
53 |
54 | }
55 |
56 | private def computeCenter(data: RDD[Vector]): Vector = {
57 | val n = data.first().size
58 | val (cum, num) = data.treeAggregate((new DenseVector(n), 0l))(seqOp = (c, v) =>
59 | (c._1 += v, c._2 + 1),
60 | combOp = (c1, c2) => (c1._1 += c2._1, c1._2 + c2._2)
61 | )
62 | cum /= num
63 | }
64 |
65 | private def coputeVariance(data: RDD[Vector]): Vector = centerlized match {
66 | case true => {
67 | val cen = center.get
68 | val n = cen.size
69 | val (total, num) = data.treeAggregate(new DenseVector(n), 0)(seqOp = (c, v) => {
70 | val temp = v - cen
71 | temp.bitwisePow(2.0)
72 | (c._1 += temp, c._2 + 1)
73 | }, combOp = (c1, c2) => {
74 | (c1._1 += c2._1, c1._2 + c2._2)
75 |
76 | })
77 | total /= num
78 | total.bitwisePow(0.5)
79 | }
80 | case false => {
81 | val n = data.first().size
82 | val (total, num) = data.treeAggregate(new DenseVector(n), 0)(seqOp = (c, v) => {
83 | val temp = v.copy
84 | temp.bitwisePow(2.0)
85 | (c._1 += temp, c._2 + 1)
86 | }, combOp = (c1, c2) => {
87 | (c1._1 += c2._1, c1._2 + c2._2)
88 |
89 | })
90 | total /= num
91 | total.bitwisePow(0.5)
92 | }
93 | }
94 |
95 |
96 | /**
97 | * Transform the data : RDD[Vector] with the factors.
98 | * @param data
99 | * @return
100 | */
101 | def transform(data: RDD[Vector]): RDD[Vector] = {
102 | val panning: (Vector => Vector) = data.first match {
103 | case dv: DenseVector => panningD
104 | case sv: SparseVector => panningS
105 | }
106 |
107 | (centerlized, scalStd) match {
108 | case (true, false) => {
109 | if (center != None) {
110 | data.map(panning)
111 | } else {
112 | throw new IllegalAccessError("you should call computeFactor first!!!")
113 | }
114 | }
115 | case (true, true) => {
116 | if (center != None && std != None) {
117 | data.map(panning).map(scaling)
118 | }
119 | else {
120 | throw new IllegalAccessError("you should call computeFactor first!!!")
121 | }
122 | }
123 | case (false, true) => {
124 | if (std != None) {
125 | data.map(scaling)
126 | }
127 | else {
128 | throw new IllegalAccessError("you should call computeFactor first!!!")
129 | }
130 | }
131 | case (false, false) => {
132 | throw new IllegalArgumentException("you need not a scaller!!!")
133 | }
134 | }
135 | }
136 |
137 | /**
138 | * Transform the data : Vector with the factors.
139 | */
140 | def transform(data: Vector): Vector = {
141 | val panning: (Vector => Vector) = data match {
142 | case sv: SparseVector => panningS
143 | case dv: DenseVector => panningD
144 | }
145 |
146 | (centerlized, scalStd) match {
147 | case (true, false) => {
148 | if (center != None) {
149 | panning(data)
150 | } else {
151 | throw new IllegalAccessError("you should call computeFactor first!!!")
152 | }
153 |
154 | }
155 | case (true, true) => {
156 | if (center != None && std != None) {
157 | panning(data)
158 | scaling(data)
159 | }
160 | else {
161 | throw new IllegalAccessError("you should call computeFactor first!!!")
162 | }
163 | }
164 | case (false, true) => {
165 | if (std != None) {
166 | scaling(data)
167 | }
168 | else {
169 | throw new IllegalAccessError("you should call computeFactor first!!!")
170 | }
171 | }
172 | case (false, false) => {
173 | throw new IllegalArgumentException("you need not a scaller!!!")
174 | }
175 | }
176 | }
177 |
178 | private def panningS(vec: Vector): Vector = {
179 | vec - center.get
180 | }
181 |
182 | private def panningD(vec: Vector): Vector = {
183 | vec -= center.get
184 | }
185 |
186 | private def scaling(vec: Vector): Vector = {
187 | val s = std.get
188 | vec match {
189 | case de: DenseVector => {
190 | val eValues = de.values
191 | var offset = 0
192 | while (offset < eValues.length) {
193 | eValues(offset) /= s.apply(offset)
194 | offset += 1
195 | }
196 | de
197 | }
198 | case se: SparseVector => {
199 | val eIndices = se.indices
200 | val eValues = se.values
201 | var offset = 0
202 | while (offset < eValues.length) {
203 | eValues(offset) /= s.apply(eIndices(offset))
204 | offset += 1
205 | }
206 | se
207 | }
208 | }
209 | }
210 |
211 | }
212 |
--------------------------------------------------------------------------------
/src/main/scala/linalg/Vector.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License. */
15 | package libble.linalg
16 |
17 | import java.util
18 |
19 | /**
20 | * This is the trait of Vector.
21 | */
22 | sealed trait Vector extends Serializable {
23 |
24 |
25 | /**
26 | * Get the i-th element
27 | *
28 | * @param i
29 | * @return double
30 | */
31 | def apply(i: Int): Double
32 |
33 | /** Length
34 | *
35 | * @return number of elements
36 | */
37 | def size: Int
38 |
39 | /**
40 | * Return a copy of this.
41 | *
42 | * @return new copy
43 | */
44 | def copy: Vector
45 |
46 | /**
47 | * Apply function on each item.
48 | *
49 | * @param f
50 | */
51 | def foreachActive(f: (Int, Double) => Unit)
52 |
53 | /**
54 | * Return the number of nonzero elements.
55 | *
56 | * @return nnz
57 | */
58 | def nnz: Int
59 |
60 | /**
61 | * Convert the vector to an array.
62 | *
63 | * @return array
64 | */
65 | def toArray: Array[Double]
66 |
67 |
68 | def vector: Vector
69 |
70 | }
71 |
72 | /**
73 | * Class of Dense Vector.
74 | *
75 | * @param values
76 | */
77 | case class DenseVector(val values: Array[Double]) extends Vector {
78 |
79 | /**
80 | * Initialize a DenseVector with all elements zero.
81 | *
82 | * @param size
83 | * @return
84 | */
85 | def this(size: Int) = this {
86 | val temp = new Array[Double](size)
87 | util.Arrays.fill(temp, 0, size, 0.0)
88 | temp
89 | }
90 |
91 | /**
92 | * Return the i-th element.
93 | *
94 | * @param i
95 | * @return double
96 | */
97 | override def apply(i: Int): Double = values(i)
98 |
99 |
100 | /**
101 | * Return a copy of this.
102 | *
103 | * @return new copy
104 | */
105 | override def copy: DenseVector = {
106 | new DenseVector(values.clone())
107 | }
108 |
109 | /**
110 | * Return a copy of this vector.
111 | *
112 | * @return copy
113 | */
114 | override def clone(): DenseVector = {
115 | copy
116 | }
117 |
118 | /**
119 | * Return the hashcode of this vector.
120 | *
121 | * @return
122 | */
123 | override def hashCode(): Int = {
124 | var code = 0
125 | var offset = 0
126 | while (offset < 7) {
127 | val bits = java.lang.Double.doubleToLongBits(values(offset))
128 | code = code * 13 + (bits ^ (bits >>> 32)).toInt
129 | offset += 1
130 | }
131 | code
132 | }
133 |
134 | /**
135 | * Return the number of nonzero elements.
136 | *
137 | * @return nnz
138 | */
139 | override def nnz: Int = {
140 | var num = 0
141 | var offset = 0
142 | while (offset < values.length) {
143 | if (values(offset) != 0)
144 | num += 1
145 | offset += 1
146 | }
147 | num
148 | }
149 |
150 | /** Length
151 | *
152 | * @return number of elements
153 | */
154 | override def size: Int = values.length
155 |
156 | /**
157 | * Convert the vector to an array.
158 | *
159 | * @return array
160 | */
161 | override def toArray: Array[Double] = values
162 |
163 | /**
164 | * Convert this vector to a string.
165 | *
166 | * @return
167 | */
168 | override def toString(): String = {
169 | values.mkString("[", ",", "]")
170 | }
171 |
172 | /**
173 | * Apply function on each item.
174 | *
175 | * @param f
176 | */
177 | override def foreachActive(f: (Int, Double) => Unit): Unit = {
178 | var offset = 0
179 | while (offset < size) {
180 | f(offset, values(offset))
181 | offset += 1
182 | }
183 | }
184 |
185 | override def vector: Vector = this
186 | }
187 |
188 | /**
189 | * Class of the Sparse Vector.
190 | *
191 | * @param indices
192 | * @param values
193 | * @param dim
194 | */
195 | case class SparseVector(val indices: Array[Int], val values: Array[Double], dim: Int) extends Vector {
196 | require(indices.length == values.length && indices.length <= size, "length of indices doesn't match actual !")
197 |
198 |
199 | /**
200 | * Return the active size of element.
201 | *
202 | * @return active size
203 | */
204 | def activeSize: Int = indices.length
205 |
206 | /**
207 | * get the i-th element of this vector.
208 | *
209 | * @param i
210 | * @return double
211 | */
212 | override def apply(i: Int): Double = {
213 | var offset = 0
214 | while (indices(offset) < i) {
215 | offset += 1
216 | }
217 | if (indices(offset) == i) {
218 | values(offset)
219 | } else {
220 | 0.0
221 | }
222 | }
223 |
224 | /**
225 | * Return a copy of this.
226 | *
227 | * @return new copy
228 | */
229 | override def copy(): SparseVector = {
230 | new SparseVector(indices.clone(), values.clone(), dim)
231 | }
232 |
233 | /**
234 | * Return a copy of this vector.
235 | *
236 | * @return copy
237 | */
238 | override def clone(): SparseVector = {
239 | copy()
240 | }
241 |
242 | /**
243 | * Return the hashcode of this vector.
244 | *
245 | * @return Int hashcode
246 | */
247 | override def hashCode(): Int = {
248 | var code = size * indices.length
249 | var offset = 0
250 | while (offset < values.size && offset < 7) {
251 | val bits = java.lang.Double.doubleToLongBits(values(offset))
252 | code = code * 13 + indices(offset) * (bits ^ (bits >>> 32)).toInt
253 | offset += 1
254 | }
255 | code
256 | }
257 |
258 | /**
259 | * Return the number of nonzero elements.
260 | *
261 | * @return nnz
262 | */
263 | override def nnz: Int = {
264 | var num = 0
265 | var offset = 0
266 | while (offset < values.length) {
267 | if (values(offset) != 0)
268 | num += 1
269 | offset += 1
270 | }
271 | num
272 | }
273 |
274 | /**
275 | * Length
276 | *
277 | * @return number of elements
278 | */
279 | override def size: Int = dim
280 |
281 | /**
282 | * Convert the vector to an array.
283 | *
284 | * @return array
285 | */
286 | override def toArray: Array[Double] = {
287 | val data = new Array[Double](size)
288 | util.Arrays.fill(data, 0, size, 0.0)
289 | var offset = 0
290 | while (offset < activeSize) {
291 | data(indices(offset)) = values(offset)
292 | offset += 1
293 | }
294 | data
295 | }
296 |
297 | /**
298 | * Convert the vector to a string.
299 | *
300 | * @return string
301 | */
302 | override def toString: String = {
303 | s"$size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")}"
304 | }
305 |
306 | /**
307 | * Apply function on each item.
308 | *
309 | * @param f
310 | */
311 | override def foreachActive(f: (Int, Double) => Unit): Unit = {
312 | var offset = 0
313 | while (offset < activeSize) {
314 | f(indices(offset), values(offset))
315 | offset += 1
316 | }
317 | }
318 |
319 | override def vector: Vector = this
320 | }
321 |
322 |
--------------------------------------------------------------------------------
/src/main/scala/dimReduction/GLS_Matrix_Batch.scala:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
3 | * All Rights Reserved.
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License. */
15 | package libble.dimReduction
16 |
17 | import java.util.Calendar
18 |
19 | import libble.linalg.implicits._
20 | import libble.linalg.{DenseVector, Vector}
21 | import org.apache.spark.rdd.RDD
22 |
23 | import scala.collection.mutable.ArrayBuffer
24 | import scala.util.Random
25 |
26 |
27 | /**
28 | *
29 | * This class is the Generalized Linear Algorithms for PCA model which uses mini-batch strategy during optimization process.
30 | *
31 | * @param stepSize
32 | * @param regParam
33 | * @param factor
34 | * @param iters
35 | * @param parts
36 | * @param batchSize
37 | * @param K
38 | */
39 | class GLS_Matrix_Batch(var stepSize: Double,
40 | var regParam: Double,
41 | var factor: Double,
42 | var iters: Int,
43 | var parts: Int,
44 | var batchSize: Int,
45 | var K: Int) extends Serializable {
46 | def this() = this(1.0, 0.0001, 0.0001, 5, 2, 1, 1)
47 |
48 | private[this] var stopBound: Double = 0.0
49 | var weightsVector: Option[Vector] = None
50 |
51 | /**
52 | * Set the stop bound.
53 | *
54 | * @param value
55 | * @return this.type
56 | */
57 | def setStopBound(value: Double): this.type = {
58 | stopBound = value
59 | this
60 | }
61 |
62 | /**
63 | * Train the model on training data.
64 | *
65 | * @param input
66 | * @return principle components and loss array
67 | */
68 | def train(input: RDD[Vector]): (Array[Vector], Array[Double]) = {
69 | val dims = input.first().size
70 | val W0 = new Array[Vector](K)
71 | for (i <- 0 to K - 1) {
72 | val arr = new Array[Double](dims)
73 | for (j <- arr.indices)
74 | arr(j) = Random.nextGaussian()
75 | W0(i) = new DenseVector(arr.clone())
76 | val n = W0(i).norm2()
77 | W0(i) /= n
78 | }
79 |
80 | train(input, W0)
81 | }
82 |
83 |
84 | /**
85 | * Train on training data with initial weights.
86 | *
87 | * @param input
88 | * @param initialWs
89 | * @return principle components and loss array
90 | */
91 | def train(input: RDD[Vector], initialWs: Array[Vector]): (Array[Vector], Array[Double]) = {
92 | if (parts == (-1)) parts = input.partitions.length
93 | val data = {
94 | if (parts == input.partitions.length)
95 | input.cache()
96 | else
97 | input.coalesce(parts, true).cache()
98 | }
99 | runEngine(data, initialWs)
100 | }
101 |
102 |
103 | /**
104 | * The PCA optimization engine.
105 | *
106 | * @param data
107 | * @param initialWs
108 | * @return
109 | */
110 | private[this] def runEngine(data: RDD[Vector], initialWs: Array[Vector]): (Array[Vector], Array[Double]) = {
111 |
112 | val K = initialWs.length
113 | val count = data.count()
114 | var weights = new Array[Vector](K)
115 | for (k <- 0 to K - 1)
116 | weights(k) = initialWs(k).copy
117 | val n = weights(0).size
118 | var convergenced = false
119 |
120 | val startTime = Calendar.getInstance().getTimeInMillis
121 |
122 | /**
123 | * outer loop
124 | */
125 | val lossArray = ArrayBuffer[Double]()
126 | var i = 0
127 | var time = 0l
128 |
129 | while (i < iters && !convergenced) {
130 |
131 | val w = data.context.broadcast(weights)
132 | var time = Calendar.getInstance().getTimeInMillis
133 | val temp = new Array[Vector](K)
134 | for (k <- 0 to K - 1)
135 | temp(k) = new DenseVector(n)
136 |
137 | val (mu, lossTotal, diag) = data.treeAggregate(temp, 0.0, new Array[Double](K))(
138 | seqOp = (c, v) => {
139 | var lossTemp = 0.0
140 | for (k <- 0 to K - 1) {
141 | val inner = v * w.value(k)
142 | val loss = -1.0 * inner * inner
143 | c._1(k).plusax(inner, v)
144 | c._3(k) += loss
145 | lossTemp += loss
146 | }
147 | (c._1, c._2 + lossTemp, c._3)
148 | },
149 | combOp = (c1, c2) => {
150 | for (k <- 0 to K - 1) {
151 | c2._1(k) += c1._1(k)
152 | c2._3(k) += c1._3(k)
153 | }
154 | (c2._1, c1._2 + c2._2, c2._3)
155 | }
156 | )
157 | for (k <- 0 to K - 1)
158 | mu(k) /= count.toDouble
159 |
160 | val loss = lossTotal / count.toDouble
161 | println(s"$loss ${time - startTime} ")
162 | for (k <- 0 to K - 1)
163 | println(diag(k) / count.toDouble)
164 | println()
165 | lossArray += loss
166 |
167 |
168 | val temp2 = new Array[Vector](K)
169 | for (k <- 0 to K - 1)
170 | temp2(k) = new DenseVector(n)
171 |
172 | val w_0 = data.context.broadcast(weights)
173 | val weightsAll = data.mapPartitions({ iter =>
174 | val omiga = new Array[Vector](K)
175 | for (k <- 0 to K - 1)
176 | omiga(k) = w_0.value(k).copy
177 | val indexSeq = iter.toIndexedSeq
178 | val pNum = indexSeq.size
179 |
180 | /**
181 | * inner loop
182 | */
183 | for (j <- 1 to pNum / batchSize) {
184 |
185 | val delta = new Array[Vector](K)
186 | for (k <- 0 to K - 1)
187 | delta(k) = new DenseVector(n)
188 |
189 | for (b <- 1 to batchSize) {
190 | val e = indexSeq(Random.nextInt(pNum))
191 | for (k <- 0 to K - 1) {
192 | val f1 = e * omiga(k)
193 | val f2 = e * w_0.value(k)
194 | delta(k).plusax(f1 - f2, e)
195 | }
196 | }
197 |
198 | for (k <- 0 to K - 1) {
199 | delta(k) /= batchSize
200 | delta(k) += mu(k)
201 | omiga(k).plusax(stepSize, delta(k))
202 | }
203 |
204 | GramSchmidt(omiga)
205 | }
206 | Iterator(omiga)
207 | }, true)
208 | .treeAggregate(temp2)(seqOp = (c, w) => {
209 | for (k <- 0 to K - 1)
210 | c(k) += w(k)
211 | c
212 | }, combOp = { (w1, w2) =>
213 | for (k <- 0 to K - 1)
214 | w1(k) += w2(k)
215 | w1
216 | })
217 |
218 | for (k <- 0 to K - 1)
219 | weightsAll(k) /= parts.toDouble
220 |
221 | GramSchmidt(weightsAll)
222 |
223 | weights = weightsAll
224 |
225 | if (i >= 2)
226 | convergenced = isConvergenced(lossArray)
227 | i += 1
228 | time = Calendar.getInstance().getTimeInMillis
229 | }
230 | println(s"losses of the last 10 iteration are:${lossArray.takeRight(5).mkString(",")}")
231 |
232 | (weights, lossArray.toArray)
233 |
234 | }
235 |
236 | /**
237 | * Judge whether the convergence condition is satisfied.
238 | *
239 | * @param lossArray
240 | * @return Boolean
241 | */
242 | private[this] def isConvergenced(lossArray: ArrayBuffer[Double]): Boolean = {
243 | val len = lossArray.length
244 | (math.abs(lossArray(len - 1) - lossArray(len - 2)) < stopBound) && (lossArray(len - 1) < lossArray(len - 2))
245 | }
246 |
247 | /**
248 | *
249 | * This method is the implementation of GramSchmidt orthonormalization which is invoked in each inner loop.
250 | *
251 | * @param weights
252 | */
253 | def GramSchmidt(weights: Array[Vector]): Unit = {
254 | val beta = new Array[Vector](K)
255 | for (k <- 0 to K - 1) {
256 | weights(k) /= parts.toDouble
257 | beta(k) = weights(k).copy
258 | for (j <- 0 to k - 1) {
259 | val xishu = (beta(j) * weights(k)) / (beta(j) * beta(j))
260 | beta(k).plusax(-1.0 * xishu, beta(j))
261 | }
262 | }
263 | for (k <- 0 to K - 1) {
264 | val normk = beta(k).norm2()
265 | beta(k) /= normk
266 | weights(k) = beta(k).copy
267 | }
268 | }
269 |
270 | }
271 |
--------------------------------------------------------------------------------
/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/src/main/scala/collaborativeFiltering/MatrixFactorization.scala:
--------------------------------------------------------------------------------
1 | /*
2 | *
3 | * Copyright (c) 2016 LIBBLE team supervised by Dr. Wu-Jun LI at Nanjing University.
4 | * All Rights Reserved.
5 | * Licensed under the Apache License, Version 2.0 (the "License");
6 | * You may not use this file except in compliance with the License.
7 | * You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | *
16 | */
17 |
18 | /**
19 | * Created by syh on 2016/12/9.
20 | */
21 |
22 | package libble.collaborativeFiltering
23 |
24 | import libble.linalg.implicits._
25 | import libble.linalg.{DenseVector, Vector}
26 | import libble.utils.{XORShiftRandom, WorkerStore}
27 | import org.apache.spark.rdd.RDD
28 |
29 | import scala.collection.mutable.ArrayBuffer
30 | import scala.util.hashing.byteswap64
31 |
32 | case class Rating(rating: Double, index_x: Int, index_y: Int)
33 |
34 | /**
35 | * This is an acceleration version of matrix factorization,
36 | * but it require that numParts equal to the actual number of machines.
37 | */
38 | class MatrixFactorization extends Serializable{
39 | /**
40 | * initialize the user factors and item factors randomly
41 | *
42 | * @param indices user(item) indices
43 | * @param rank the length of factor
44 | * @return
45 | */
46 | def initialize(indices: Set[Int], rank :Int) : Map[Int, Vector]= {
47 | val seedGen = new XORShiftRandom()
48 | val random = new XORShiftRandom(byteswap64(seedGen.nextLong()))
49 | val vectors = new Array[Vector](indices.size)
50 | for (i <- vectors.indices) {
51 | val factors = Array.fill(rank)(random.nextGaussian())
52 | val v = new DenseVector(factors)
53 | v /= v.norm2()
54 | vectors(i) = v
55 | }
56 | indices.zip(vectors).toMap
57 | }
58 |
59 | /**
60 | * This is an acceleration version of matrix factorization,
61 | * but it require that numParts equal to the actual number of machines.
62 | *
63 | * @param trainSet RDD of ratings
64 | * @param numIters number of outer loop
65 | * @param numParts number of workers
66 | * @param rank length of factor
67 | * @param lambda_u regularization parameter of users
68 | * @param lambda_v regularization parameter of items
69 | * @param stepSize stepsize for update the factors.
70 | * @return matrix factorization model
71 | */
72 | def train (trainSet: RDD[Rating],
73 | numIters: Int,
74 | numParts: Int,
75 | rank: Int,
76 | lambda_u: Double,
77 | lambda_v: Double,
78 | stepSize: Double,
79 | ifPrintLoss: Int) : MatrixFactorizationModel = {
80 | var stepsize = stepSize
81 | val items = trainSet.mapPartitions{iter =>
82 | val is = iter.map(r => r.index_y).toSet
83 | Iterator.single(is)
84 | }
85 | .reduce((a,b)=> a.union(b))
86 | val numRatings = trainSet.count()
87 | //random hash the data by row
88 | val ratingsByRow = trainSet.groupBy(_.index_x)
89 | .repartition(numParts)
90 | .values
91 | .flatMap(i=>i)
92 | .cache()
93 | //number of inner iterations is the maximum number of ratings in p workers
94 | val numInnerIters = ratingsByRow.mapPartitions(i => Iterator.single(i.length)).reduce((a,b)=>math.max(a,b))
95 |
96 | //initialize item factors in master
97 | var itemFactors = initialize(items, rank)
98 | //initialize U in p workers
99 | ratingsByRow.mapPartitionsWithIndex{(index,iter) =>
100 | val indices_x = iter.map(r => r.index_x).toSet
101 | val userFactors = initialize(indices_x,rank)
102 | MatrixFactorization.workerstore.put(s"userFactors_$index", userFactors)
103 | Iterator.single(0)
104 | }.count()
105 | //main loop
106 | val startTime = System.currentTimeMillis()
107 | val lossList = new ArrayBuffer[Double]()
108 | var testTime = 0L
109 | var i = 0
110 | while (i < numIters){
111 | if(ifPrintLoss == 1){
112 | //loss
113 | val testTimeStart = System.currentTimeMillis()
114 | val bc_test_itemFactors = ratingsByRow.context.broadcast(itemFactors)
115 | //training loss
116 | val loss = ratingsByRow.mapPartitions {iter =>
117 | val localV = bc_test_itemFactors.value
118 | val localU = MatrixFactorization.workerstore.get[Map[Int, Vector]]("userFactors")
119 | val reguV = localV.mapValues(v => lambda_v * v.dot(v))
120 | val reguU = localU.mapValues(u => lambda_u * u.dot(u))
121 | val ls = iter.foldLeft(0.0) { (l, r) =>
122 | val uh = localU.get(r.index_x).get
123 | val vj = localV.get(r.index_y).get
124 | val residual = r.rating - uh.dot(vj)
125 | l + residual * residual + reguU.get(r.index_x).get + reguV.get(r.index_y).get
126 | }
127 | Iterator.single(ls)
128 | }.reduce(_ + _) / numRatings
129 | bc_test_itemFactors.unpersist()
130 | print(s"$loss\t")
131 | testTime += (System.currentTimeMillis() - testTimeStart)
132 | println(s"${System.currentTimeMillis() - testTime - startTime}")
133 | }
134 | //broadcast V to p workers
135 | val bc_itemFactors = ratingsByRow.context.broadcast(itemFactors)
136 | //for each woker i parallelly do
137 | val (newItemFactors, lossSum) = ratingsByRow.mapPartitionsWithIndex{case(index,iter) =>
138 | val localRatings = iter.toArray
139 | val numLocalRatings = localRatings.length
140 | val localV = bc_itemFactors.value
141 | val localU = MatrixFactorization.workerstore.get[Map[Int, Vector]](s"userFactors_$index")
142 | val seedGen = new XORShiftRandom()
143 | val random = new XORShiftRandom(byteswap64(seedGen.nextLong() ^ index))
144 | var loss = 0.0
145 | //inner loop
146 | for(i <- 1 to numInnerIters){
147 | //randomly select an instance r_h,k from R_i
148 | val ranRating = localRatings(random.nextInt(numLocalRatings))
149 | val uh = localU.get(ranRating.index_x).get
150 | val vj = localV.get(ranRating.index_y).get
151 | //update uh
152 | val residual = ranRating.rating - uh.dot(vj)
153 | uh *= (1- stepsize * lambda_u)
154 | uh.plusax(stepsize * residual, vj)
155 | }
156 | for(i <- 1 to numInnerIters){
157 | //randomly select an instance r_h,k from R_i
158 | val ranRating = localRatings(random.nextInt(numLocalRatings))
159 | val uh = localU.get(ranRating.index_x).get
160 | val vj = localV.get(ranRating.index_y).get
161 | //update vj
162 | val residual = ranRating.rating - uh.dot(vj)
163 | val rrr = stepsize * residual
164 | vj *= (1 - stepsize * lambda_v)
165 | vj.plusax(stepsize * residual, uh)
166 | loss += (residual * residual)
167 | }
168 | Iterator.single((bc_itemFactors.value, loss))
169 | }
170 | .reduce { (a, b) =>
171 | val temp = a._1
172 | b._1.foreach{case (i, v) =>
173 | v.plusax(1.0, temp.get(i).get)
174 | }
175 | (b._1, a._2 + b._2)
176 | }
177 | itemFactors = newItemFactors
178 | itemFactors.foreach(ui => ui._2 /= numParts.toDouble)
179 | bc_itemFactors.unpersist()
180 |
181 | val approxLoss = lossSum / (numParts * numInnerIters)
182 | if (i != 0) {
183 | val oldLoss = lossList.last
184 | if (approxLoss > oldLoss)
185 | stepsize = stepsize * 0.5
186 | else
187 | stepsize *= 1.05
188 | }
189 | lossList.append(approxLoss)
190 |
191 | // println(s"approximate loss: $approxLoss, time: ${System.currentTimeMillis() - startTime}")
192 |
193 | i += 1
194 | }
195 | val trainOver = System.currentTimeMillis()
196 | val bc_test_itemFactors = ratingsByRow.context.broadcast(itemFactors)
197 | val loss = ratingsByRow.mapPartitionsWithIndex { (index,iter )=>
198 | val localV = bc_test_itemFactors.value
199 | val localU = MatrixFactorization.workerstore.get[Map[Int, Vector]](s"userFactors_$index")
200 | val reguV = localV.mapValues(v => lambda_v * v.dot(v))
201 | val reguU = localU.mapValues(u => lambda_u * u.dot(u))
202 | val ls = iter.foldLeft(0.0) { (l, r) =>
203 | val uh = localU.get(r.index_x).get
204 | val vj = localV.get(r.index_y).get
205 | val residual = r.rating - uh.dot(vj)
206 | l + residual * residual + reguU.get(r.index_x).get + reguV.get(r.index_y).get
207 | }
208 | Iterator.single(ls)
209 | }
210 | .reduce(_ + _) / numRatings
211 | bc_test_itemFactors.unpersist()
212 | println(s"loss: $loss\t")
213 | println(s"cputime of training process(ms): ${ trainOver - startTime }")
214 |
215 | val userFactorsRDD = ratingsByRow.mapPartitionsWithIndex{(index,iter) =>
216 | val factors = MatrixFactorization.workerstore.get[Map[Int, Vector]](s"userFactors_$index")
217 | factors.toIterator
218 | }.cache()
219 | val itemFactorsRDD = ratingsByRow.context.parallelize(itemFactors.toSeq, numParts).cache()
220 | new MatrixFactorizationModel(rank, userFactorsRDD, itemFactorsRDD)
221 | }
222 | }
223 |
224 | object MatrixFactorization {
225 | val workerstore = new WorkerStore()
226 | }
227 |
--------------------------------------------------------------------------------