├── .gitignore
├── src
└── main
│ ├── resources
│ ├── META-INF
│ │ └── MANIFEST.MF
│ └── ansj_library.properties
│ └── scala
│ └── com
│ └── lenovo
│ └── ml
│ ├── Word2Vector.scala
│ ├── XGBoostInference.scala
│ ├── DataPreprocess.scala
│ └── XGBoostTrain.scala
├── .idea
├── copyright
│ └── profiles_settings.xml
├── encodings.xml
├── vcs.xml
├── modules.xml
├── artifacts
│ └── xgbspark_text_classification_jar.xml
├── libraries
│ ├── Maven__oro_oro_2_0_8.xml
│ ├── Maven__junit_junit_4_12.xml
│ ├── Maven__org_tukaani_xz_1_0.xml
│ ├── Maven__antlr_antlr_2_7_7.xml
│ ├── Maven__log4j_log4j_1_2_17.xml
│ ├── Maven__org_antlr_ST4_4_0_4.xml
│ ├── Maven__xmlenc_xmlenc_0_52.xml
│ ├── Maven__stax_stax_api_1_0_1.xml
│ ├── Maven__javax_transaction_jta_1_1.xml
│ ├── Maven__net_sf_py4j_py4j_0_10_4.xml
│ ├── Maven__org_apache_ivy_ivy_2_4_0.xml
│ ├── Maven__javax_jdo_jdo_api_3_0_1.xml
│ ├── Maven__net_jpountz_lz4_lz4_1_3_0.xml
│ ├── Maven__org_ansj_ansj_seg_5_1_2.xml
│ ├── Maven__org_apache_avro_avro_1_7_7.xml
│ ├── Maven__org_iq80_snappy_snappy_0_2.xml
│ ├── Maven__com_google_inject_guice_3_0.xml
│ ├── Maven__net_sf_opencsv_opencsv_2_3.xml
│ ├── Maven__org_jodd_jodd_core_3_5_2.xml
│ ├── Maven__org_nlpcn_nlp_lang_1_7_2.xml
│ ├── Maven__commons_io_commons_io_2_4.xml
│ ├── Maven__joda_time_joda_time_2_9_3.xml
│ ├── Maven__net_razorvine_pyrolite_4_13.xml
│ ├── Maven__org_objenesis_objenesis_2_1.xml
│ ├── Maven__aopalliance_aopalliance_1_0.xml
│ ├── Maven__com_google_guava_guava_14_0_1.xml
│ ├── Maven__commons_cli_commons_cli_1_2.xml
│ ├── Maven__commons_net_commons_net_2_2.xml
│ ├── Maven__io_netty_netty_3_8_0_Final.xml
│ ├── Maven__javax_inject_javax_inject_1.xml
│ ├── Maven__org_scala_lang_scalap_2_11_0.xml
│ ├── Maven__org_slf4j_slf4j_api_1_7_16.xml
│ ├── Maven__javolution_javolution_5_5_1.xml
│ ├── Maven__org_antlr_antlr_runtime_3_4.xml
│ ├── Maven__org_jpmml_pmml_model_1_2_15.xml
│ ├── Maven__com_ning_compress_lzf_1_0_3.xml
│ ├── Maven__com_twitter_chill_2_11_0_8_0.xml
│ ├── Maven__com_twitter_chill_java_0_8_0.xml
│ ├── Maven__commons_dbcp_commons_dbcp_1_4.xml
│ ├── Maven__commons_lang_commons_lang_2_6.xml
│ ├── Maven__ml_dmlc_xgboost4j_spark_0_7.xml
│ ├── Maven__net_java_dev_jets3t_jets3t_0_7_1.xml
│ ├── Maven__org_apache_avro_avro_ipc_1_7_7.xml
│ ├── Maven__org_codehaus_janino_janino_3_0_0.xml
│ ├── Maven__org_jpmml_pmml_schema_1_2_15.xml
│ ├── Maven__org_scalanlp_breeze_2_11_0_12.xml
│ ├── Maven__com_esotericsoftware_minlog_1_3_0.xml
│ ├── Maven__org_hamcrest_hamcrest_core_1_3.xml
│ ├── Maven__com_github_fommil_netlib_core_1_1_2.xml
│ ├── Maven__org_apache_derby_derby_10_10_2_0.xml
│ ├── Maven__org_apache_thrift_libfb303_0_9_3.xml
│ ├── Maven__org_slf4j_jul_to_slf4j_1_7_16.xml
│ ├── Maven__org_spire_math_spire_2_11_0_7_4.xml
│ ├── Maven__com_github_rwl_jtransforms_2_4_0.xml
│ ├── Maven__commons_pool_commons_pool_1_5_4.xml
│ ├── Maven__org_antlr_antlr4_runtime_4_5_3.xml
│ ├── Maven__org_antlr_stringtemplate_3_2_1.xml
│ ├── Maven__org_apache_thrift_libthrift_0_9_3.xml
│ ├── Maven__org_slf4j_slf4j_log4j12_1_7_16.xml
│ ├── Maven__com_jolbox_bonecp_0_8_0_RELEASE.xml
│ ├── Maven__commons_codec_commons_codec_1_10.xml
│ ├── Maven__org_spark_project_spark_unused_1_0_0.xml
│ ├── Maven__com_chuusai_shapeless_2_11_2_0_0.xml
│ ├── Maven__com_google_code_findbugs_jsr305_1_3_9.xml
│ ├── Maven__io_netty_netty_all_4_0_42_Final.xml
│ ├── Maven__org_apache_avro_avro_ipc_tests_1_7_7.xml
│ ├── Maven__org_slf4j_jcl_over_slf4j_1_7_16.xml
│ ├── Maven__com_clearspring_analytics_stream_2_7_0.xml
│ ├── Maven__org_apache_commons_commons_math_2_1.xml
│ ├── Maven__org_apache_hadoop_hadoop_auth_2_2_0.xml
│ ├── Maven__org_apache_hadoop_hadoop_hdfs_2_2_0.xml
│ ├── Maven__org_apache_zookeeper_zookeeper_3_4_5.xml
│ ├── Maven__org_glassfish_hk2_hk2_api_2_4_0_b34.xml
│ ├── Maven__org_javassist_javassist_3_18_1_GA.xml
│ ├── Maven__org_mortbay_jetty_jetty_util_6_1_26.xml
│ ├── Maven__javax_ws_rs_javax_ws_rs_api_2_0_1.xml
│ ├── Maven__org_apache_commons_commons_lang3_3_5.xml
│ ├── Maven__org_json4s_json4s_ast_2_11_3_2_11.xml
│ ├── Maven__com_googlecode_javaewah_JavaEWAH_0_3_2.xml
│ ├── Maven__org_scala_lang_scala_library_2_11_8.xml
│ ├── Maven__org_scala_lang_scala_reflect_2_11_7.xml
│ ├── Maven__com_esotericsoftware_kryo_shaded_3_0_3.xml
│ ├── Maven__com_thoughtworks_paranamer_paranamer_2_3.xml
│ ├── Maven__log4j_apache_log4j_extras_1_2_17.xml
│ ├── Maven__org_apache_avro_avro_mapred_hadoop2_1_7_7.xml
│ ├── Maven__org_json4s_json4s_core_2_11_3_2_11.xml
│ ├── Maven__commons_digester_commons_digester_1_8.xml
│ ├── Maven__io_dropwizard_metrics_metrics_jvm_3_1_2.xml
│ ├── Maven__org_apache_hadoop_hadoop_client_2_2_0.xml
│ ├── Maven__org_apache_hadoop_hadoop_common_2_2_0.xml
│ ├── Maven__org_apache_httpcomponents_httpcore_4_4_4.xml
│ ├── Maven__org_apache_spark_spark_sql_2_11_2_1_1.xml
│ ├── Maven__org_glassfish_hk2_hk2_utils_2_4_0_b34.xml
│ ├── Maven__org_scala_lang_scala_compiler_2_11_0.xml
│ ├── Maven__org_xerial_snappy_snappy_java_1_1_2_6.xml
│ ├── Maven__commons_logging_commons_logging_1_1_3.xml
│ ├── Maven__org_apache_commons_commons_math3_3_4_1.xml
│ ├── Maven__org_roaringbitmap_RoaringBitmap_0_5_11.xml
│ ├── Maven__org_scalanlp_breeze_macros_2_11_0_12.xml
│ ├── Maven__com_google_protobuf_protobuf_java_2_5_0.xml
│ ├── Maven__com_univocity_univocity_parsers_2_2_1.xml
│ ├── Maven__io_dropwizard_metrics_metrics_core_3_1_2.xml
│ ├── Maven__io_dropwizard_metrics_metrics_json_3_1_2.xml
│ ├── Maven__javax_servlet_javax_servlet_api_3_1_0.xml
│ ├── Maven__org_apache_commons_commons_crypto_1_0_0.xml
│ ├── Maven__org_apache_curator_curator_client_2_4_0.xml
│ ├── Maven__org_apache_parquet_parquet_column_1_8_1.xml
│ ├── Maven__org_apache_parquet_parquet_common_1_8_1.xml
│ ├── Maven__org_apache_parquet_parquet_hadoop_1_8_1.xml
│ ├── Maven__org_apache_spark_spark_core_2_11_2_1_1.xml
│ ├── Maven__org_apache_spark_spark_hive_2_11_2_1_1.xml
│ ├── Maven__org_apache_spark_spark_tags_2_11_2_1_1.xml
│ ├── Maven__org_apache_xbean_xbean_asm5_shaded_4_4.xml
│ ├── Maven__org_apache_hadoop_hadoop_yarn_api_2_2_0.xml
│ ├── Maven__org_apache_httpcomponents_httpclient_4_5_2.xml
│ ├── Maven__org_apache_spark_spark_mllib_2_11_2_1_1.xml
│ ├── Maven__org_glassfish_hk2_hk2_locator_2_4_0_b34.xml
│ ├── Maven__org_spire_math_spire_macros_2_11_0_7_4.xml
│ ├── Maven__org_apache_curator_curator_recipes_2_4_0.xml
│ ├── Maven__org_apache_parquet_parquet_jackson_1_8_1.xml
│ ├── Maven__org_datanucleus_datanucleus_core_3_2_10.xml
│ ├── Maven__org_datanucleus_datanucleus_rdbms_3_2_9.xml
│ ├── Maven__org_json4s_json4s_jackson_2_11_3_2_11.xml
│ ├── Maven__commons_httpclient_commons_httpclient_3_1.xml
│ ├── Maven__org_apache_commons_commons_compress_1_4_1.xml
│ ├── Maven__org_apache_parquet_parquet_encoding_1_8_1.xml
│ ├── Maven__org_apache_spark_spark_graphx_2_11_2_1_1.xml
│ ├── Maven__org_apache_spark_spark_sketch_2_11_2_1_1.xml
│ ├── Maven__org_apache_spark_spark_unsafe_2_11_2_1_1.xml
│ ├── Maven__com_twitter_parquet_hadoop_bundle_1_6_0.xml
│ ├── Maven__commons_beanutils_commons_beanutils_1_7_0.xml
│ ├── Maven__org_codehaus_janino_commons_compiler_3_0_0.xml
│ ├── Maven__org_fusesource_leveldbjni_leveldbjni_all_1_8.xml
│ ├── Maven__org_scala_lang_modules_scala_xml_2_11_1_0_1.xml
│ ├── Maven__com_fasterxml_jackson_core_jackson_core_2_6_5.xml
│ ├── Maven__javax_annotation_javax_annotation_api_1_2.xml
│ ├── Maven__org_apache_curator_curator_framework_2_4_0.xml
│ ├── Maven__org_datanucleus_datanucleus_api_jdo_3_2_6.xml
│ ├── Maven__io_dropwizard_metrics_metrics_graphite_3_1_2.xml
│ ├── Maven__net_hydromatic_eigenbase_properties_1_1_5.xml
│ ├── Maven__net_sourceforge_f2j_arpack_combined_all_0_1.xml
│ ├── Maven__org_apache_hadoop_hadoop_annotations_2_2_0.xml
│ ├── Maven__org_apache_hadoop_hadoop_yarn_client_2_2_0.xml
│ ├── Maven__org_apache_hadoop_hadoop_yarn_common_2_2_0.xml
│ ├── Maven__org_apache_spark_spark_catalyst_2_11_2_1_1.xml
│ ├── Maven__org_apache_spark_spark_launcher_2_11_2_1_1.xml
│ ├── Maven__org_codehaus_jackson_jackson_core_asl_1_9_13.xml
│ ├── Maven__org_glassfish_jersey_core_jersey_client_2_22_2.xml
│ ├── Maven__org_glassfish_jersey_core_jersey_common_2_22_2.xml
│ ├── Maven__org_glassfish_jersey_core_jersey_server_2_22_2.xml
│ ├── Maven__org_spark_project_hive_hive_exec_1_2_1_spark2.xml
│ ├── Maven__javax_validation_validation_api_1_1_0_Final.xml
│ ├── Maven__org_apache_spark_spark_streaming_2_11_2_1_1.xml
│ ├── Maven__commons_collections_commons_collections_3_2_1.xml
│ ├── Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_13.xml
│ ├── Maven__org_glassfish_hk2_osgi_resource_locator_1_0_1.xml
│ ├── Maven__com_fasterxml_jackson_core_jackson_databind_2_6_5.xml
│ ├── Maven__commons_configuration_commons_configuration_1_6.xml
│ ├── Maven__org_apache_spark_spark_mllib_local_2_11_2_1_1.xml
│ ├── Maven__org_glassfish_hk2_external_javax_inject_2_4_0_b34.xml
│ ├── Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml
│ ├── Maven__org_apache_calcite_calcite_core_1_2_0_incubating.xml
│ ├── Maven__org_glassfish_jersey_media_jersey_media_jaxb_2_22_2.xml
│ ├── Maven__org_spark_project_hive_hive_metastore_1_2_1_spark2.xml
│ ├── Maven__com_fasterxml_jackson_core_jackson_annotations_2_6_5.xml
│ ├── Maven__org_apache_spark_spark_network_common_2_11_2_1_1.xml
│ ├── Maven__org_apache_hadoop_hadoop_yarn_server_common_2_2_0.xml
│ ├── Maven__org_apache_spark_spark_network_shuffle_2_11_2_1_1.xml
│ ├── Maven__org_apache_calcite_calcite_linq4j_1_2_0_incubating.xml
│ ├── Maven__org_apache_parquet_parquet_format_2_3_0_incubating.xml
│ ├── Maven__org_apache_calcite_calcite_avatica_1_2_0_incubating.xml
│ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_app_2_2_0.xml
│ ├── Maven__org_glassfish_jersey_bundles_repackaged_jersey_guava_2_22_2.xml
│ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_core_2_2_0.xml
│ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_common_2_2_0.xml
│ ├── Maven__com_fasterxml_jackson_module_jackson_module_paranamer_2_6_5.xml
│ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_shuffle_2_2_0.xml
│ ├── Maven__org_glassfish_hk2_external_aopalliance_repackaged_2_4_0_b34.xml
│ ├── Maven__com_fasterxml_jackson_module_jackson_module_scala_2_11_2_6_5.xml
│ ├── Maven__org_scala_lang_modules_scala_parser_combinators_2_11_1_0_1.xml
│ ├── Maven__org_apache_hadoop_hadoop_mapreduce_client_jobclient_2_2_0.xml
│ ├── Maven__org_glassfish_jersey_containers_jersey_container_servlet_2_22_2.xml
│ └── Maven__org_glassfish_jersey_containers_jersey_container_servlet_core_2_22_2.xml
├── misc.xml
├── compiler.xml
└── uiDesigner.xml
├── README.md
├── pom.xml
├── LICENSE
└── xgbspark-text-classification.iml
/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 | *.log
3 |
--------------------------------------------------------------------------------
/src/main/resources/META-INF/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | Main-Class: com.lenovo.ml.XGBoostTrain
3 |
4 |
--------------------------------------------------------------------------------
/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/artifacts/xgbspark_text_classification_jar.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | $PROJECT_DIR$/out/artifacts/xgbspark_text_classification_jar
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__oro_oro_2_0_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__junit_junit_4_12.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_tukaani_xz_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__antlr_antlr_2_7_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__log4j_log4j_1_2_17.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_antlr_ST4_4_0_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__xmlenc_xmlenc_0_52.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__stax_stax_api_1_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_transaction_jta_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_sf_py4j_py4j_0_10_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_ivy_ivy_2_4_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Features
2 |
3 | * Data Source: `Hive`
4 | * Word Segmentation: `Ansj`
5 | * Feature Engineering: `NGram + TF-IDF` or `Pre-Trained Word2Vec`
6 | * Classification Algorithm: `XGBoost`
7 | * Model Training: `Spark Pipeline`
8 | * Model Selection and Tuning: `Cross Validation + Grid Search`
9 |
10 | ## Environments
11 |
12 | * [Spark](http://spark.apache.org) 2.1.1
13 | * [Hive](https://hive.apache.org) 1.2.1
14 | * [XGBoost4J-Spark](https://github.com/dmlc/xgboost/tree/master/jvm-packages) 0.7
15 | * [Ansj](https://github.com/NLPchina/ansj_seg) 5.1.2
16 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_jdo_jdo_api_3_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_jpountz_lz4_lz4_1_3_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_ansj_ansj_seg_5_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_1_7_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_iq80_snappy_snappy_0_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_inject_guice_3_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_sf_opencsv_opencsv_2_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_jodd_jodd_core_3_5_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_nlpcn_nlp_lang_1_7_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_io_commons_io_2_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__joda_time_joda_time_2_9_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_razorvine_pyrolite_4_13.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_objenesis_objenesis_2_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/src/main/resources/ansj_library.properties:
--------------------------------------------------------------------------------
1 | #path of userLibrary this is default library
2 | #dic=library/default.dic
3 |
4 | #path of crfModel
5 | #crf_dic1=library/crf.model
6 |
7 | #stop_dic1=library/stop.dic
8 |
9 | #redress dic file path
10 | #ambiguityLibrary=library/ambiguity.dic
11 | #synonymsLibrary=library/synonyms.dic
12 |
13 | #set real name
14 | isRealName=true
15 |
16 | #isNameRecognition default true
17 | isNameRecognition=true
18 |
19 | #isNumRecognition default true
20 | isNumRecognition=true
21 |
22 | #digital quantifier merge default true
23 | isQuantifierRecognition=true
--------------------------------------------------------------------------------
/.idea/libraries/Maven__aopalliance_aopalliance_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_guava_guava_14_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_net_commons_net_2_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_netty_netty_3_8_0_Final.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_inject_javax_inject_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_scalap_2_11_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_16.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javolution_javolution_5_5_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_antlr_antlr_runtime_3_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_jpmml_pmml_model_1_2_15.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_ning_compress_lzf_1_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_twitter_chill_2_11_0_8_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_twitter_chill_java_0_8_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_dbcp_commons_dbcp_1_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_lang_commons_lang_2_6.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__ml_dmlc_xgboost4j_spark_0_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_java_dev_jets3t_jets3t_0_7_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_ipc_1_7_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_codehaus_janino_janino_3_0_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_jpmml_pmml_schema_1_2_15.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scalanlp_breeze_2_11_0_12.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_esotericsoftware_minlog_1_3_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_github_fommil_netlib_core_1_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_derby_derby_10_10_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_thrift_libfb303_0_9_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_slf4j_jul_to_slf4j_1_7_16.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_spire_math_spire_2_11_0_7_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_github_rwl_jtransforms_2_4_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_pool_commons_pool_1_5_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_antlr_antlr4_runtime_4_5_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_antlr_stringtemplate_3_2_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_thrift_libthrift_0_9_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_slf4j_slf4j_log4j12_1_7_16.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_jolbox_bonecp_0_8_0_RELEASE.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_codec_commons_codec_1_10.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_spark_project_spark_unused_1_0_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_chuusai_shapeless_2_11_2_0_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_code_findbugs_jsr305_1_3_9.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_netty_netty_all_4_0_42_Final.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_ipc_tests_1_7_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_slf4j_jcl_over_slf4j_1_7_16.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_clearspring_analytics_stream_2_7_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_auth_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_hdfs_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_zookeeper_zookeeper_3_4_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_hk2_api_2_4_0_b34.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_javassist_javassist_3_18_1_GA.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_mortbay_jetty_jetty_util_6_1_26.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_ws_rs_javax_ws_rs_api_2_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_json4s_json4s_ast_2_11_3_2_11.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_googlecode_javaewah_JavaEWAH_0_3_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_scala_library_2_11_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_scala_reflect_2_11_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_esotericsoftware_kryo_shaded_3_0_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_thoughtworks_paranamer_paranamer_2_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__log4j_apache_log4j_extras_1_2_17.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_mapred_hadoop2_1_7_7.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_json4s_json4s_core_2_11_3_2_11.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_jvm_3_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_client_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_common_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_httpcomponents_httpcore_4_4_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_sql_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_hk2_utils_2_4_0_b34.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_scala_compiler_2_11_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_xerial_snappy_snappy_java_1_1_2_6.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_logging_commons_logging_1_1_3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_math3_3_4_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_roaringbitmap_RoaringBitmap_0_5_11.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scalanlp_breeze_macros_2_11_0_12.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_protobuf_protobuf_java_2_5_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_univocity_univocity_parsers_2_2_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_core_3_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_json_3_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_servlet_javax_servlet_api_3_1_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_crypto_1_0_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_curator_curator_client_2_4_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_column_1_8_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_common_1_8_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_hadoop_1_8_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_core_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_hive_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_tags_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_xbean_xbean_asm5_shaded_4_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_api_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_httpcomponents_httpclient_4_5_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_mllib_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_hk2_locator_2_4_0_b34.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_spire_math_spire_macros_2_11_0_7_4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_curator_curator_recipes_2_4_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_jackson_1_8_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_datanucleus_datanucleus_core_3_2_10.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_datanucleus_datanucleus_rdbms_3_2_9.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_json4s_json4s_jackson_2_11_3_2_11.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_compress_1_4_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_encoding_1_8_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_graphx_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_sketch_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_unsafe_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_twitter_parquet_hadoop_bundle_1_6_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_codehaus_janino_commons_compiler_3_0_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_fusesource_leveldbjni_leveldbjni_all_1_8.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_modules_scala_xml_2_11_1_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_core_2_6_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_annotation_javax_annotation_api_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_curator_curator_framework_2_4_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_datanucleus_datanucleus_api_jdo_3_2_6.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_graphite_3_1_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_hydromatic_eigenbase_properties_1_1_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_sourceforge_f2j_arpack_combined_all_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_annotations_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_client_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_common_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_catalyst_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_launcher_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_13.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_core_jersey_client_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_core_jersey_common_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_core_jersey_server_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_spark_project_hive_hive_exec_1_2_1_spark2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_validation_validation_api_1_1_0_Final.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_streaming_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_13.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_osgi_resource_locator_1_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_6_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_mllib_local_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_external_javax_inject_2_4_0_b34.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_calcite_calcite_core_1_2_0_incubating.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_media_jersey_media_jaxb_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_spark_project_hive_hive_metastore_1_2_1_spark2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_annotations_2_6_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_network_common_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_yarn_server_common_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_spark_spark_network_shuffle_2_11_2_1_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_calcite_calcite_linq4j_1_2_0_incubating.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_parquet_parquet_format_2_3_0_incubating.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_calcite_calcite_avatica_1_2_0_incubating.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_app_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_bundles_repackaged_jersey_guava_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_core_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_common_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_fasterxml_jackson_module_jackson_module_paranamer_2_6_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_shuffle_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_hk2_external_aopalliance_repackaged_2_4_0_b34.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_fasterxml_jackson_module_jackson_module_scala_2_11_2_6_5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_modules_scala_parser_combinators_2_11_1_0_1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_hadoop_hadoop_mapreduce_client_jobclient_2_2_0.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_containers_jersey_container_servlet_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_glassfish_jersey_containers_jersey_container_servlet_core_2_22_2.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/.idea/compiler.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/src/main/scala/com/lenovo/ml/Word2Vector.scala:
--------------------------------------------------------------------------------
1 | package com.lenovo.ml
2 |
3 | /**
4 | * Created by YangChenguang on 2017/10/17.
5 | */
6 | import org.apache.spark.sql.SparkSession
7 | import DataPreprocess.segWords
8 | import org.apache.spark.ml.feature._
9 | import org.apache.spark.ml.Pipeline
10 |
11 | object Word2Vector {
12 | def main(args:Array[String]): Unit = {
13 | // 1、创建Spark程序入口
14 | val sparkSession = SparkSession.builder().appName("Word2Vector").enableHiveSupport().getOrCreate()
15 |
16 | // 2、读取训练数据,对文本预处理后分词
17 | val tableName = args(0)
18 | val matrix = sparkSession.sql("SELECT text FROM " + tableName + " where text is not null")
19 | val words = segWords(sparkSession, args(1), args(2), args(3), args(4), matrix).repartition(6).cache()
20 |
21 | // 3、数据准备
22 | val tokenizer = new RegexTokenizer().setInputCol("words").setOutputCol("wordsArray")
23 | val remover = new StopWordsRemover().setInputCol("wordsArray").setOutputCol("filteredWords")
24 |
25 | // 4、训练Word2Vec模型
26 | val word2Vec = new Word2Vec().setInputCol("filteredWords").setOutputCol("features").setStepSize(0.025).setNumPartitions(1)
27 | .setMaxIter(1).setMaxSentenceLength(1000).setWindowSize(5).setVectorSize(args(5).toInt).setMinCount(10).setSeed(12345L)
28 | val pipeline = new Pipeline().setStages(Array(tokenizer, remover, word2Vec))
29 | val Word2VecModel = pipeline.fit(words)
30 |
31 | // 5、保存模型
32 | Word2VecModel.write.save(args(6))
33 |
34 | sparkSession.stop()
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/scala/com/lenovo/ml/XGBoostInference.scala:
--------------------------------------------------------------------------------
1 | package com.lenovo.ml
2 |
3 | /**
4 | * Created by YangChenguang on 2017/9/15.
5 | */
6 | import org.apache.spark.sql.{Row, SparkSession}
7 | import org.apache.spark.sql.types.StructType
8 | import DataPreprocess.segWords
9 | import org.apache.spark.ml.PipelineModel
10 |
11 | object XGBoostInference {
12 | def main(args:Array[String]): Unit = {
13 | // 1、创建Spark程序入口
14 | val sparkSession = SparkSession.builder().appName("XGBoostInference").enableHiveSupport().getOrCreate()
15 |
16 | // 2、读取训练数据,对文本预处理后分词
17 | val tableName = args(0)
18 | val matrix = sparkSession.sql("SELECT * FROM " + tableName)
19 | val words = segWords(sparkSession, args(1), args(2), args(3), args(4), matrix.select("text"))
20 |
21 | // 3、将原数据与分词结果关联起来
22 | val rows = matrix.rdd.zip(words.rdd).map{
23 | case (rowLeft, rowRight) => Row.fromSeq(rowLeft.toSeq ++ rowRight.toSeq)
24 | }
25 | val schema = StructType(matrix.schema.fields ++ words.schema.fields)
26 | val matrixMerge = sparkSession.createDataFrame(rows, schema)
27 |
28 | // 4、构建特征向量
29 | val featuredModelTrained = sparkSession.sparkContext.broadcast(PipelineModel.read.load(args(5)))
30 | val dataPrepared = featuredModelTrained.value.transform(matrixMerge).repartition(18).cache()
31 |
32 | // 5、加载分类模型,产出故障预测结果
33 | val xgbModelTrained = sparkSession.sparkContext.broadcast(PipelineModel.read.load(args(6)))
34 | val prediction = xgbModelTrained.value.transform(dataPrepared)
35 |
36 | // 6、将预测结果写到HDFS
37 | prediction.select("text", "predictedLabel", "probabilities").rdd.coalesce(1).saveAsTextFile(args(7))
38 |
39 | sparkSession.stop()
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | com.lenovo.ml
8 | xgbspark-text-classification
9 | 1.0-SNAPSHOT
10 |
11 |
12 | UTF-8
13 | 2.1.1
14 | 2.11
15 | 2.6.4
16 |
17 |
18 |
19 |
20 | org.apache.spark
21 | spark-core_${scala.version}
22 | ${spark.version}
23 |
24 |
25 | org.apache.spark
26 | spark-sql_${scala.version}
27 | ${spark.version}
28 |
29 |
30 | org.apache.spark
31 | spark-hive_${scala.version}
32 | ${spark.version}
33 |
34 |
35 | org.apache.spark
36 | spark-mllib_${scala.version}
37 | ${spark.version}
38 |
39 |
40 | org.nlpcn
41 | nlp-lang
42 | 1.7.2
43 |
44 |
45 | org.ansj
46 | ansj_seg
47 | 5.1.2
48 |
49 |
50 | ml.dmlc
51 | xgboost4j-spark
52 | 0.7
53 |
54 |
55 | junit
56 | junit
57 | 4.12
58 |
59 |
60 |
61 |
62 | src/main/scala
63 | src/test/scala
64 |
65 |
66 |
67 | org.apache.maven.plugins
68 | maven-compiler-plugin
69 | 3.3
70 |
71 | 1.8
72 | 1.8
73 | UTF-8
74 |
75 |
76 |
77 |
78 |
79 |
--------------------------------------------------------------------------------
/src/main/scala/com/lenovo/ml/DataPreprocess.scala:
--------------------------------------------------------------------------------
1 | package com.lenovo.ml
2 |
3 | import org.apache.spark.sql.{SparkSession, DataFrame, Dataset}
4 | import scala.collection.mutable
5 | import scala.util.matching.Regex
6 | import org.ansj.library.DicLibrary
7 | import org.ansj.recognition.impl.StopRecognition
8 | import org.ansj.splitWord.analysis.DicAnalysis
9 |
10 | /**
11 | * Created by YangChenguang on 2017/12/27.
12 | */
13 | object DataPreprocess {
14 | def textCleaner(sparkSession: SparkSession, rawText: DataFrame): Dataset[String] = {
15 | // 过滤文本中的时间、网址和邮箱
16 | val regex1 = new Regex("""[-—0-9a-z]+[:]+[0-9a-z]+[:]?""")
17 | val regex2 = new Regex("""[0-9]+年|[0-9]+月|[0-9]+[日]|[0-9]+[天]|[0-9]+[号]|[0-9]+[次]""")
18 | val regex3 = new Regex("""http[s]?://[a-z0-9./?=_-]+""")
19 | val regex4 = new Regex("""[0-9_a-z]+([-+.][0-9_a-z]+)*@[0-9_a-z]+([-.][0-9_a-z]+)*\.[0-9_a-z]+([-.][0-9_a-z]+)*""")
20 |
21 | import sparkSession.implicits._
22 | rawText.map(x => x.toString).map(x => x.substring(1,x.length - 1).toLowerCase).map(x => regex1.replaceAllIn(x,""))
23 | .map(x => regex2.replaceAllIn(x,"")).map(x => regex3.replaceAllIn(x,"")).map(x => regex4.replaceAllIn(x,""))
24 | }
25 |
26 | def segWords(sparkSession: SparkSession, stopWordsPath: String, dictionaryPath: String, synonymWordsPath: String,
27 | singleWordsPath: String, rawText: DataFrame): DataFrame = {
28 | val filter = new StopRecognition()
29 | // 设定停用词性
30 | filter.insertStopNatures("w","ns","nr","t","r","u","e","y","o")
31 | // 加载停用词表
32 | val stopWords = sparkSession.sparkContext.textFile(stopWordsPath).cache()
33 | stopWords.collect().foreach{line => filter.insertStopWords(line)}
34 | // 加载自定义词表
35 | val dictionary = sparkSession.sparkContext.textFile(dictionaryPath).cache()
36 | dictionary.collect().foreach{line => DicLibrary.insert(DicLibrary.DEFAULT, line)}
37 | stopWords.collect().foreach{line => DicLibrary.insert(DicLibrary.DEFAULT, line)}
38 | // 构建同义词表
39 | val synonymWords = sparkSession.sparkContext.textFile(synonymWordsPath).cache()
40 | var synonymMap: Map[String, String] = Map()
41 | synonymWords.collect().foreach{line =>
42 | val data = line.split(" ",2)
43 | synonymMap = synonymMap + (data(0) -> data(1))
44 | }
45 | // 构建单字白名单
46 | val singleWords = sparkSession.sparkContext.textFile(singleWordsPath).cache()
47 | val singleWhiteList: mutable.Set[String] = mutable.Set()
48 | singleWords.collect().foreach{line => singleWhiteList.add(line)}
49 |
50 | // 通过广播将词表发送给各节点
51 | val stop = sparkSession.sparkContext.broadcast(filter)
52 | val dic = sparkSession.sparkContext.broadcast(DicLibrary.get(DicLibrary.DEFAULT))
53 | val synonym = sparkSession.sparkContext.broadcast(synonymMap)
54 | val single = sparkSession.sparkContext.broadcast(singleWhiteList)
55 |
56 | // 读取文本数据,过滤后分词
57 | import sparkSession.implicits._
58 | textCleaner(sparkSession, rawText).map { x =>
59 | val parse = DicAnalysis.parse(x, dic.value).recognition(stop.value)
60 | // 抽取分词结果,不附带词性
61 | val words = for(i<-Range(0,parse.size())) yield parse.get(i).getName
62 | val filterWords = words.map(_.trim).filter(x => x.length > 1 || single.value.contains(x))
63 | filterWords.map(x => if(synonym.value.contains(x)) synonym.value(x) else x).mkString(" ")
64 | }.toDF("words")
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/main/scala/com/lenovo/ml/XGBoostTrain.scala:
--------------------------------------------------------------------------------
1 | package com.lenovo.ml
2 |
3 | /**
4 | * Created by YangChenguang on 2017/9/14.
5 | */
6 | import org.apache.spark.SparkException
7 | import org.apache.spark.sql.types.StructType
8 | import org.apache.spark.sql.{DataFrame, Row, SparkSession}
9 | import DataPreprocess.segWords
10 | import scala.collection.mutable
11 | import org.apache.spark.ml.feature._
12 | import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostEstimator}
13 | import org.apache.spark.ml.{Pipeline, PipelineModel}
14 | import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
15 | import org.apache.spark.ml.tuning.{ParamGridBuilder, TrainValidationSplit, TrainValidationSplitModel}
16 |
17 | object XGBoostTrain {
18 | def featureEngineeringTFIDF(sparkSession: SparkSession, dataMatrix: DataFrame, savePath: String): DataFrame ={
19 | // 获取nGram
20 | val tokenizer = new RegexTokenizer().setInputCol("words").setOutputCol("wordsArray")
21 | val remover = new StopWordsRemover().setInputCol("wordsArray").setOutputCol("filteredWords")
22 | val nGram2 = new NGram().setN(2).setInputCol("filteredWords").setOutputCol("gram-2")
23 | val nGram3 = new NGram().setN(3).setInputCol("filteredWords").setOutputCol("gram-3")
24 |
25 | // 计算TF-IDF
26 | val countVectorizer_1gram = new CountVectorizer().setInputCol("filteredWords")
27 | val countVectorizer_2gram = new CountVectorizer().setInputCol("gram-2")
28 | val countVectorizer_3gram = new CountVectorizer().setInputCol("gram-3")
29 | val idf_1gram = new IDF().setInputCol(countVectorizer_1gram.getOutputCol).setOutputCol("tfidf-1gram").setMinDocFreq(10)
30 | val idf_2gram = new IDF().setInputCol(countVectorizer_2gram.getOutputCol).setOutputCol("tfidf-2gram").setMinDocFreq(10)
31 | val idf_3gram = new IDF().setInputCol(countVectorizer_3gram.getOutputCol).setOutputCol("tfidf-3gram").setMinDocFreq(10)
32 | val assembler = new VectorAssembler().setInputCols(Array("tfidf-1gram", "tfidf-2gram", "tfidf-3gram")).setOutputCol("features")
33 |
34 | // 构造特征向量
35 | val pipeline = new Pipeline().setStages(Array(tokenizer, remover, nGram2, nGram3, countVectorizer_1gram,
36 | countVectorizer_2gram, countVectorizer_3gram, idf_1gram, idf_2gram, idf_3gram, assembler))
37 | pipeline.fit(dataMatrix).write.save(savePath)
38 | val pipelineModelTrained = sparkSession.sparkContext.broadcast(PipelineModel.read.load(savePath))
39 | pipelineModelTrained.value.transform(dataMatrix)
40 | }
41 |
42 | def featureEngineeringWord2Vec(sparkSession: SparkSession, dataMatrix: DataFrame, savePath: String): DataFrame ={
43 | // 加载预训练的Word2Vec模型,构造特征向量
44 | val pipelineModelTrained = sparkSession.sparkContext.broadcast(PipelineModel.read.load(savePath))
45 | pipelineModelTrained.value.transform(dataMatrix)
46 | }
47 |
48 | def crossValidation(xgboostParam: Map[String, Any], labelIndexer: StringIndexerModel,
49 | evaluator: MulticlassClassificationEvaluator, trainingData: DataFrame): TrainValidationSplitModel = {
50 | // XGBoost Pipeline Model
51 | val xgbEstimator = new XGBoostEstimator(xgboostParam).setLabelCol("labelIndex").setFeaturesCol("features").setPredictionCol("prediction")
52 | val labelConverter = new IndexToString().setInputCol("prediction").setOutputCol("predictedLabel").setLabels(labelIndexer.labels)
53 | val pipeline = new Pipeline().setStages(Array(xgbEstimator, labelConverter))
54 |
55 | // Grid Search + Cross Validation
56 | val paramGrid = new ParamGridBuilder()
57 | .addGrid(xgbEstimator.eta, Array(0.08, 0.1))
58 | .addGrid(xgbEstimator.round, Array(50, 100))
59 | .addGrid(xgbEstimator.maxDepth, Array(300, 500))
60 | .build()
61 | val crossValidator = new TrainValidationSplit()
62 | .setEstimator(pipeline)
63 | .setEvaluator(evaluator)
64 | .setEstimatorParamMaps(paramGrid)
65 | .setTrainRatio(0.9)
66 | crossValidator.fit(trainingData)
67 | }
68 |
69 | def main(args:Array[String]): Unit ={
70 | // 1、创建Spark程序入口
71 | val sparkSession = SparkSession.builder().appName("XGBoostTrain").enableHiveSupport().getOrCreate()
72 |
73 | // 2、读取训练数据,对文本预处理后分词
74 | val tableName = args(0)
75 | val matrix = sparkSession.sql("SELECT * FROM " + tableName + " where text is not null")
76 | val words = segWords(sparkSession, args(1), args(2), args(3), args(4), matrix.select("text"))
77 |
78 | // 3、将原数据与分词结果关联起来
79 | val rows = matrix.rdd.zip(words.rdd).map{
80 | case (rowLeft, rowRight) => Row.fromSeq(rowLeft.toSeq ++ rowRight.toSeq)
81 | }
82 | val schema = StructType(matrix.schema.fields ++ words.schema.fields)
83 | val matrixMerge = sparkSession.createDataFrame(rows, schema)
84 |
85 | // 4、构建特征向量
86 | var featuredData = sparkSession.emptyDataFrame
87 | if (args(5).toLowerCase == "tfidf")
88 | featuredData = featureEngineeringTFIDF(sparkSession, matrixMerge, args(6))
89 | else if (args(5).toLowerCase == "word2vec")
90 | featuredData = featureEngineeringWord2Vec(sparkSession, matrixMerge, args(6))
91 | else
92 | throw new SparkException("Feature engineering algorithm must be TFIDF or Word2Vec")
93 |
94 | // 5、将label转化为数值
95 | val labelIndexer = new StringIndexer().setInputCol("label").setOutputCol("labelIndex").fit(featuredData)
96 | val dataPrepared = labelIndexer.transform(featuredData).select("text", "features", "label", "labelIndex")
97 |
98 | // 6、按比例划分训练数据和测试数据
99 | val testSize = args(7).toDouble
100 | val splits = dataPrepared.randomSplit(Array(1 - testSize, testSize), seed = 12345L)
101 | val (trainingData, testData) = (splits(0).repartition(18).cache(), splits(1).repartition(18).cache())
102 |
103 | // 7、定义模型评估方法
104 | val evaluator = new MulticlassClassificationEvaluator()
105 | .setLabelCol("labelIndex")
106 | .setPredictionCol("prediction")
107 | .setMetricName("accuracy") // Spark2.0以前为"precision"
108 |
109 | // 8、设定模型参数,训练XGBoost文本分类模型
110 | val paramMap = new mutable.HashMap[String, Any]()
111 | paramMap += "nworkers" -> 18
112 | paramMap += "use_external_memory" -> false
113 | // paramMap += "eta" -> 0.1f
114 | // paramMap += "num_round" -> 50
115 | // paramMap += "max_depth" -> 300
116 | paramMap += "min_child_weight" -> 3
117 | paramMap += "alpha" -> 0.01
118 | paramMap += "gamma" -> 0
119 | paramMap += "subsample" -> 0.8
120 | paramMap += "colsample_bytree" -> 0.8
121 | paramMap += "scale_pos_weight" -> 1
122 | paramMap += "num_class" -> args(8).toInt
123 | paramMap += "objective" -> "multi:softprob"
124 | paramMap += "numEarlyStoppingRounds" -> 0
125 | paramMap += "trainTestRatio" -> 0.9
126 | paramMap += "booster" -> "dart"
127 | paramMap += "rate_drop" -> 0.1
128 | paramMap += "skip_drop" -> 0.5
129 | paramMap += "seed" -> 12345L
130 | val cvModel = crossValidation(paramMap.toMap, labelIndexer, evaluator, trainingData)
131 |
132 | // 9、分类模型的保存与加载
133 | val bestPipelineModel = cvModel.bestModel.asInstanceOf[PipelineModel]
134 | bestPipelineModel.write.save(args(9))
135 | val xgbModelTrained = sparkSession.sparkContext.broadcast(PipelineModel.read.load(args(9)))
136 |
137 | // 10、使用训练好的模型对测试集样本进行分类
138 | val prediction = xgbModelTrained.value.transform(testData)
139 |
140 | // 11、评估模型效果
141 | prediction.select("text", "label", "predictedLabel", "probabilities").rdd.coalesce(1).saveAsTextFile(args(10))
142 | val accuracy = evaluator.evaluate(prediction)
143 | sparkSession.sparkContext.parallelize(List("Accuracy = " + accuracy)).coalesce(1).saveAsTextFile(args(11))
144 |
145 | // 12、保存模型参数
146 | val stages = xgbModelTrained.value.stages
147 | val modelTrainingStage = stages(0).asInstanceOf[XGBoostClassificationModel]
148 | sparkSession.sparkContext.makeRDD(modelTrainingStage.extractParamMap().toSeq).coalesce(1).saveAsTextFile(args(12))
149 |
150 | sparkSession.stop()
151 | }
152 | }
153 |
--------------------------------------------------------------------------------
/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | -
6 |
7 |
8 | -
9 |
10 |
11 | -
12 |
13 |
14 | -
15 |
16 |
17 | -
18 |
19 |
20 |
21 |
22 |
23 | -
24 |
25 |
26 |
27 |
28 |
29 | -
30 |
31 |
32 |
33 |
34 |
35 | -
36 |
37 |
38 |
39 |
40 |
41 | -
42 |
43 |
44 |
45 |
46 | -
47 |
48 |
49 |
50 |
51 | -
52 |
53 |
54 |
55 |
56 | -
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 |
65 |
66 | -
67 |
68 |
69 |
70 |
71 | -
72 |
73 |
74 | -
75 |
76 |
77 |
78 |
79 | -
80 |
81 |
82 |
83 |
84 | -
85 |
86 |
87 |
88 |
89 | -
90 |
91 |
92 |
93 |
94 | -
95 |
96 |
97 |
98 |
99 | -
100 |
101 |
102 | -
103 |
104 |
105 | -
106 |
107 |
108 | -
109 |
110 |
111 | -
112 |
113 |
114 |
115 |
116 | -
117 |
118 |
119 | -
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/xgbspark-text-classification.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
--------------------------------------------------------------------------------