├── .gitignore
├── .idea
    ├── .name
    ├── compiler.xml
    ├── copyright
    │   └── profiles_settings.xml
    ├── encodings.xml
    ├── libraries
    │   ├── Maven__com_clearspring_analytics_stream_2_7_0.xml
    │   ├── Maven__com_google_code_findbugs_jsr305_1_3_9.xml
    │   ├── Maven__com_google_code_gson_gson_2_2_4.xml
    │   ├── Maven__com_google_protobuf_protobuf_java_2_5_0.xml
    │   ├── Maven__com_ning_compress_lzf_1_0_3.xml
    │   ├── Maven__com_sun_jersey_jersey_client_1_9.xml
    │   ├── Maven__com_sun_jersey_jersey_core_1_9.xml
    │   ├── Maven__com_sun_xml_bind_jaxb_core_2_2_11.xml
    │   ├── Maven__com_sun_xml_bind_jaxb_impl_2_2_11.xml
    │   ├── Maven__commons_beanutils_commons_beanutils_1_7_0.xml
    │   ├── Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml
    │   ├── Maven__commons_cli_commons_cli_1_2.xml
    │   ├── Maven__commons_configuration_commons_configuration_1_6.xml
    │   ├── Maven__commons_digester_commons_digester_1_8.xml
    │   ├── Maven__commons_httpclient_commons_httpclient_3_1.xml
    │   ├── Maven__commons_io_commons_io_2_4.xml
    │   ├── Maven__commons_logging_commons_logging_1_1_3.xml
    │   ├── Maven__commons_net_commons_net_2_2.xml
    │   ├── Maven__io_dropwizard_metrics_metrics_core_3_1_2.xml
    │   ├── Maven__io_dropwizard_metrics_metrics_graphite_3_1_2.xml
    │   ├── Maven__io_dropwizard_metrics_metrics_json_3_1_2.xml
    │   ├── Maven__io_dropwizard_metrics_metrics_jvm_3_1_2.xml
    │   ├── Maven__javax_servlet_servlet_api_2_5.xml
    │   ├── Maven__javax_xml_bind_jaxb_api_2_2_2.xml
    │   ├── Maven__javax_xml_stream_stax_api_1_0_2.xml
    │   ├── Maven__junit_junit_4_11.xml
    │   ├── Maven__log4j_log4j_1_2_17.xml
    │   ├── Maven__net_jpountz_lz4_lz4_1_3_0.xml
    │   ├── Maven__org_apache_avro_avro_ipc_1_7_7.xml
    │   ├── Maven__org_apache_avro_avro_ipc_tests_1_7_7.xml
    │   ├── Maven__org_apache_avro_avro_mapred_hadoop2_1_7_7.xml
    │   ├── Maven__org_apache_camel_camel_core_2_17_0.xml
    │   ├── Maven__org_apache_camel_camel_test_2_17_0.xml
    │   ├── Maven__org_apache_commons_commons_compress_1_4_1.xml
    │   ├── Maven__org_apache_commons_commons_math3_3_4_1.xml
    │   ├── Maven__org_apache_directory_api_api_asn1_api_1_0_0_M20.xml
    │   ├── Maven__org_apache_directory_api_api_util_1_0_0_M20.xml
    │   ├── Maven__org_apache_directory_server_apacheds_i18n_2_0_0_M15.xml
    │   ├── Maven__org_apache_directory_server_apacheds_kerberos_codec_2_0_0_M15.xml
    │   ├── Maven__org_apache_ivy_ivy_2_4_0.xml
    │   ├── Maven__org_codehaus_jackson_jackson_jaxrs_1_9_13.xml
    │   ├── Maven__org_codehaus_jackson_jackson_xc_1_9_13.xml
    │   ├── Maven__org_fusesource_leveldbjni_leveldbjni_all_1_8.xml
    │   ├── Maven__org_hamcrest_hamcrest_core_1_3.xml
    │   ├── Maven__org_mortbay_jetty_jetty_util_6_1_26.xml
    │   ├── Maven__org_scala_lang_modules_scala_xml_2_11_1_0_4.xml
    │   ├── Maven__org_slf4j_slf4j_api_1_7_13.xml
    │   ├── Maven__org_slf4j_slf4j_log4j12_1_7_13.xml
    │   ├── Maven__org_spark_project_spark_unused_1_0_0.xml
    │   ├── Maven__org_tukaani_xz_1_0.xml
    │   ├── Maven__oro_oro_2_0_8.xml
    │   ├── Maven__xerces_xercesImpl_2_9_1.xml
    │   ├── Maven__xml_apis_xml_apis_1_3_04.xml
    │   └── Maven__xmlenc_xmlenc_0_52.xml
    ├── misc.xml
    ├── modules.xml
    ├── scala_compiler.xml
    └── uiDesigner.xml
├── BigDataLearning.iml
├── META-INF
    └── MANIFEST.MF
├── RandomPrefix.txt
├── SampleJoin1.txt
├── SampleJoin2.txt
├── mapjoin.txt
├── mapjoin1.txt
├── pom.xml
├── readme.md
└── src
    └── main
        ├── resources
            ├── META-INF
            │   └── MANIFEST.MF
            ├── core-site.xml
            └── log4j.properties
        └── scala
            └── com
                └── mobin
                    ├── Advanced_Analytics_with_Spark
                        ├── NaStatCounter.scala
                        └── Patient.scala
                    ├── Example
                        ├── AudienceAnalysis.scala
                        ├── GenerateHFile.java
                        ├── HiveDataBaseConnection.java
                        ├── PutDataToHBase.java
                        ├── RandomPrefix_Shuffle.scala
                        ├── Sample_Shuffle.scala
                        ├── ScoresDataGenerator.scala
                        ├── SecondSortBykey.scala
                        ├── SexCount.java
                        ├── SparkJoin.scala
                        └── StudentDataGenerator.scala
                    ├── HDFS
                        ├── FSUtils
                        │   ├── CountFileLine.java
                        │   └── FSUtils.java
                        ├── HDFSCompressionCodec.java
                        ├── LzoCompress.java
                        └── WriteToHDFS.scala
                    ├── Kafka
                        ├── KStream
                        │   └── KStreamDemo.java
                        ├── Partition
                        │   └── StockPartitionor.java
                        ├── Producers
                        │   ├── KafkaProducerThread.java
                        │   ├── QuotationProducer.java
                        │   └── StockQuotationInfo.java
                        ├── Topic.java
                        └── consumers
                        │   ├── KafkaConsumerThread.java
                        │   ├── QuotationConsumer.java
                        │   └── QuotationConsumerManualCommit.java
                    ├── SparkRDDFun
                        └── TransFormation
                        │   ├── Action
                        │       ├── Aggregate.scala
                        │       ├── Fold.scala
                        │       ├── Func.scala
                        │       └── KVFunc.scala
                        │   ├── BaseRDD
                        │       ├── Cartesian.scala
                        │       ├── Coalesce.scala
                        │       ├── Distinct.scala
                        │       ├── FlatMap.scala
                        │       ├── Glom.scala
                        │       ├── MakeRDD.scala
                        │       ├── Map.scala
                        │       ├── MapPartitions.scala
                        │       ├── MapPartitionsWithIndex.scala
                        │       ├── RandomSplit.scala
                        │       ├── Sample.scala
                        │       ├── Union.scala
                        │       └── ZipWithIndex.scala
                        │   └── KVRDD
                        │       ├── AggregateAndFold.scala
                        │       ├── Cogroup.scala
                        │       ├── CombineByKey.scala
                        │       ├── CombineByKey1.scala
                        │       ├── FlatMapValus.scala
                        │       ├── FoldByKey.scala
                        │       ├── GroupByKey.scala
                        │       ├── Join.scala
                        │       ├── MapJoinJava.java
                        │       ├── MapSideJoin.scala
                        │       ├── MapValues.scala
                        │       ├── PartitionBy.scala
                        │       ├── ReduceByKey.scala
                        │       └── SortByKey.scala
                    ├── SparkSQL
                        ├── PeopleDemo.scala
                        ├── RowNumber.scala
                        └── SGC_LET_SHOOL_HOUR.scala
                    ├── Telecom
                        └── AirPlaneMode.scala
                    └── sparkStreaming
                        ├── FileStreaming.scala
                        ├── Flume
                            ├── SampleLogGenerator.java
                            ├── ScalaLoadDistributedEvents.scala
                            ├── ScalaLogAnalyzerJson.scala
                            ├── ScalaLogAnalyzerMap.scala
                            ├── ScalaQueryingStreams.scala
                            └── ScalaTransformLogEvents.scala
                        ├── GenerateChar.scala
                        ├── Kafka
                            └── UserBehaviorMsgProducer.scala
                        ├── QueueStream.scala
                        ├── ScoketStreaming.scala
                        ├── ScoketStreamingCheckPoint.scala
                        ├── StateFull.scala
                        └── WindowWordCount.scala


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | receivedBlockMetadata
3 | target
4 | .checkpoint*
5 | spark-warehouse


--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | BigDataLearning


--------------------------------------------------------------------------------
/.idea/compiler.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="CompilerConfiguration">
 4 |     <annotationProcessing>
 5 |       <profile name="Annotation profile for SparkStreaming" enabled="true">
 6 |         <sourceOutputDir name="target/generated-sources/annotations" />
 7 |         <sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
 8 |         <outputRelativeToContentRoot value="true" />
 9 |         <module name="SparkStreaming" />
10 |       </profile>
11 |       <profile name="Maven default annotation processors profile" enabled="true">
12 |         <sourceOutputDir name="target/generated-sources/annotations" />
13 |         <sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
14 |         <outputRelativeToContentRoot value="true" />
15 |         <module name="BigDataLearning" />
16 |         <module name="SparkExample" />
17 |       </profile>
18 |     </annotationProcessing>
19 |     <bytecodeTargetLevel>
20 |       <module name="BigDataLearning" target="1.7" />
21 |       <module name="SparkExample" target="1.7" />
22 |     </bytecodeTargetLevel>
23 |   </component>
24 | </project>


--------------------------------------------------------------------------------
/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="CopyrightManager">
2 |   <settings default="" />
3 | </component>


--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="Encoding">
4 |     <file url="file://$PROJECT_DIR$" charset="UTF-8" />
5 |     <file url="PROJECT" charset="UTF-8" />
6 |   </component>
7 | </project>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_clearspring_analytics_stream_2_7_0.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: com.clearspring.analytics:stream:2.7.0">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/com/clearspring/analytics/stream/2.7.0/stream-2.7.0.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/com/clearspring/analytics/stream/2.7.0/stream-2.7.0-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/com/clearspring/analytics/stream/2.7.0/stream-2.7.0-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_code_findbugs_jsr305_1_3_9.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: com.google.code.findbugs:jsr305:1.3.9">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_code_gson_gson_2_2_4.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: com.google.code.gson:gson:2.2.4">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/com/google/code/gson/gson/2.2.4/gson-2.2.4.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/com/google/code/gson/gson/2.2.4/gson-2.2.4-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/com/google/code/gson/gson/2.2.4/gson-2.2.4-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_google_protobuf_protobuf_java_2_5_0.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: com.google.protobuf:protobuf-java:2.5.0">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/com/google/protobuf/protobuf-java/2.5.0/protobuf-java-2.5.0-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_ning_compress_lzf_1_0_3.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: com.ning:compress-lzf:1.0.3">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/com/ning/compress-lzf/1.0.3/compress-lzf-1.0.3.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/com/ning/compress-lzf/1.0.3/compress-lzf-1.0.3-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/com/ning/compress-lzf/1.0.3/compress-lzf-1.0.3-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_sun_jersey_jersey_client_1_9.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: com.sun.jersey:jersey-client:1.9">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/jersey/jersey-client/1.9/jersey-client-1.9.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/jersey/jersey-client/1.9/jersey-client-1.9-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/jersey/jersey-client/1.9/jersey-client-1.9-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_sun_jersey_jersey_core_1_9.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: com.sun.jersey:jersey-core:1.9">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/jersey/jersey-core/1.9/jersey-core-1.9.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/jersey/jersey-core/1.9/jersey-core-1.9-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/jersey/jersey-core/1.9/jersey-core-1.9-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_sun_xml_bind_jaxb_core_2_2_11.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: com.sun.xml.bind:jaxb-core:2.2.11">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/xml/bind/jaxb-core/2.2.11/jaxb-core-2.2.11.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/xml/bind/jaxb-core/2.2.11/jaxb-core-2.2.11-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/xml/bind/jaxb-core/2.2.11/jaxb-core-2.2.11-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__com_sun_xml_bind_jaxb_impl_2_2_11.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: com.sun.xml.bind:jaxb-impl:2.2.11">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/xml/bind/jaxb-impl/2.2.11/jaxb-impl-2.2.11.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/xml/bind/jaxb-impl/2.2.11/jaxb-impl-2.2.11-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/com/sun/xml/bind/jaxb-impl/2.2.11/jaxb-impl-2.2.11-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: commons-beanutils:commons-beanutils:1.7.0">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/commons-beanutils/commons-beanutils/1.7.0/commons-beanutils-1.7.0.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/commons-beanutils/commons-beanutils/1.7.0/commons-beanutils-1.7.0-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/commons-beanutils/commons-beanutils/1.7.0/commons-beanutils-1.7.0-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: commons-beanutils:commons-beanutils-core:1.8.0">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/commons-beanutils/commons-beanutils-core/1.8.0/commons-beanutils-core-1.8.0.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/commons-beanutils/commons-beanutils-core/1.8.0/commons-beanutils-core-1.8.0-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/commons-beanutils/commons-beanutils-core/1.8.0/commons-beanutils-core-1.8.0-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: commons-cli:commons-cli:1.2">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.2/commons-cli-1.2.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.2/commons-cli-1.2-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/commons-cli/commons-cli/1.2/commons-cli-1.2-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: commons-configuration:commons-configuration:1.6">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/commons-configuration/commons-configuration/1.6/commons-configuration-1.6.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/commons-configuration/commons-configuration/1.6/commons-configuration-1.6-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/commons-configuration/commons-configuration/1.6/commons-configuration-1.6-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: commons-digester:commons-digester:1.8">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/commons-digester/commons-digester/1.8/commons-digester-1.8.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/commons-digester/commons-digester/1.8/commons-digester-1.8-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/commons-digester/commons-digester/1.8/commons-digester-1.8-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_1.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: commons-httpclient:commons-httpclient:3.1">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/commons-httpclient/commons-httpclient/3.1/commons-httpclient-3.1.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/commons-httpclient/commons-httpclient/3.1/commons-httpclient-3.1-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/commons-httpclient/commons-httpclient/3.1/commons-httpclient-3.1-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_io_commons_io_2_4.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: commons-io:commons-io:2.4">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/commons-io/commons-io/2.4/commons-io-2.4.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/commons-io/commons-io/2.4/commons-io-2.4-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/commons-io/commons-io/2.4/commons-io-2.4-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_logging_commons_logging_1_1_3.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: commons-logging:commons-logging:1.1.3">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/commons-logging/commons-logging/1.1.3/commons-logging-1.1.3.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/commons-logging/commons-logging/1.1.3/commons-logging-1.1.3-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/commons-logging/commons-logging/1.1.3/commons-logging-1.1.3-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__commons_net_commons_net_2_2.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: commons-net:commons-net:2.2">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/commons-net/commons-net/2.2/commons-net-2.2.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/commons-net/commons-net/2.2/commons-net-2.2-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/commons-net/commons-net/2.2/commons-net-2.2-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_core_3_1_2.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: io.dropwizard.metrics:metrics-core:3.1.2">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-core/3.1.2/metrics-core-3.1.2.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-core/3.1.2/metrics-core-3.1.2-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-core/3.1.2/metrics-core-3.1.2-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_graphite_3_1_2.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: io.dropwizard.metrics:metrics-graphite:3.1.2">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-graphite/3.1.2/metrics-graphite-3.1.2.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-graphite/3.1.2/metrics-graphite-3.1.2-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-graphite/3.1.2/metrics-graphite-3.1.2-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_json_3_1_2.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: io.dropwizard.metrics:metrics-json:3.1.2">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-json/3.1.2/metrics-json-3.1.2.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-json/3.1.2/metrics-json-3.1.2-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-json/3.1.2/metrics-json-3.1.2-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__io_dropwizard_metrics_metrics_jvm_3_1_2.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: io.dropwizard.metrics:metrics-jvm:3.1.2">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-jvm/3.1.2/metrics-jvm-3.1.2.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-jvm/3.1.2/metrics-jvm-3.1.2-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/io/dropwizard/metrics/metrics-jvm/3.1.2/metrics-jvm-3.1.2-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_servlet_servlet_api_2_5.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: javax.servlet:servlet-api:2.5">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/javax/servlet/servlet-api/2.5/servlet-api-2.5.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/javax/servlet/servlet-api/2.5/servlet-api-2.5-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/javax/servlet/servlet-api/2.5/servlet-api-2.5-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_2_2.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: javax.xml.bind:jaxb-api:2.2.2">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/javax/xml/bind/jaxb-api/2.2.2/jaxb-api-2.2.2.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/javax/xml/bind/jaxb-api/2.2.2/jaxb-api-2.2.2-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/javax/xml/bind/jaxb-api/2.2.2/jaxb-api-2.2.2-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__javax_xml_stream_stax_api_1_0_2.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: javax.xml.stream:stax-api:1.0-2">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/javax/xml/stream/stax-api/1.0-2/stax-api-1.0-2.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/javax/xml/stream/stax-api/1.0-2/stax-api-1.0-2-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/javax/xml/stream/stax-api/1.0-2/stax-api-1.0-2-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__junit_junit_4_11.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: junit:junit:4.11">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/junit/junit/4.11/junit-4.11.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/junit/junit/4.11/junit-4.11-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/junit/junit/4.11/junit-4.11-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__log4j_log4j_1_2_17.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: log4j:log4j:1.2.17">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/log4j/log4j/1.2.17/log4j-1.2.17.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/log4j/log4j/1.2.17/log4j-1.2.17-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/log4j/log4j/1.2.17/log4j-1.2.17-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__net_jpountz_lz4_lz4_1_3_0.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: net.jpountz.lz4:lz4:1.3.0">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_ipc_1_7_7.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.avro:avro-ipc:1.7.7">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_ipc_tests_1_7_7.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.avro:avro-ipc:tests:1.7.7">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7-tests.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7-test-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/avro/avro-ipc/1.7.7/avro-ipc-1.7.7-test-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_avro_avro_mapred_hadoop2_1_7_7.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.avro:avro-mapred:hadoop2:1.7.7">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/avro/avro-mapred/1.7.7/avro-mapred-1.7.7-hadoop2.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/avro/avro-mapred/1.7.7/avro-mapred-1.7.7-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/avro/avro-mapred/1.7.7/avro-mapred-1.7.7-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_camel_camel_core_2_17_0.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.camel:camel-core:2.17.0">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/camel/camel-core/2.17.0/camel-core-2.17.0.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/camel/camel-core/2.17.0/camel-core-2.17.0-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/camel/camel-core/2.17.0/camel-core-2.17.0-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_camel_camel_test_2_17_0.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.camel:camel-test:2.17.0">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/camel/camel-test/2.17.0/camel-test-2.17.0.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/camel/camel-test/2.17.0/camel-test-2.17.0-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/camel/camel-test/2.17.0/camel-test-2.17.0-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_compress_1_4_1.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.commons:commons-compress:1.4.1">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-compress/1.4.1/commons-compress-1.4.1-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_commons_commons_math3_3_4_1.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.commons:commons-math3:3.4.1">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-math3/3.4.1/commons-math3-3.4.1.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-math3/3.4.1/commons-math3-3.4.1-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-math3/3.4.1/commons-math3-3.4.1-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_directory_api_api_asn1_api_1_0_0_M20.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.directory.api:api-asn1-api:1.0.0-M20">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/api/api-asn1-api/1.0.0-M20/api-asn1-api-1.0.0-M20.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/api/api-asn1-api/1.0.0-M20/api-asn1-api-1.0.0-M20-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/api/api-asn1-api/1.0.0-M20/api-asn1-api-1.0.0-M20-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_directory_api_api_util_1_0_0_M20.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.directory.api:api-util:1.0.0-M20">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/api/api-util/1.0.0-M20/api-util-1.0.0-M20.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/api/api-util/1.0.0-M20/api-util-1.0.0-M20-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/api/api-util/1.0.0-M20/api-util-1.0.0-M20-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_directory_server_apacheds_i18n_2_0_0_M15.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.directory.server:apacheds-i18n:2.0.0-M15">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/server/apacheds-i18n/2.0.0-M15/apacheds-i18n-2.0.0-M15.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/server/apacheds-i18n/2.0.0-M15/apacheds-i18n-2.0.0-M15-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/server/apacheds-i18n/2.0.0-M15/apacheds-i18n-2.0.0-M15-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_directory_server_apacheds_kerberos_codec_2_0_0_M15.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.directory.server:apacheds-kerberos-codec:2.0.0-M15">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/server/apacheds-kerberos-codec/2.0.0-M15/apacheds-kerberos-codec-2.0.0-M15.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/server/apacheds-kerberos-codec/2.0.0-M15/apacheds-kerberos-codec-2.0.0-M15-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/directory/server/apacheds-kerberos-codec/2.0.0-M15/apacheds-kerberos-codec-2.0.0-M15-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_apache_ivy_ivy_2_4_0.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.apache.ivy:ivy:2.4.0">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/ivy/ivy/2.4.0/ivy-2.4.0.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/ivy/ivy/2.4.0/ivy-2.4.0-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/apache/ivy/ivy/2.4.0/ivy-2.4.0-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_codehaus_jackson_jackson_jaxrs_1_9_13.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.codehaus.jackson:jackson-jaxrs:1.9.13">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/codehaus/jackson/jackson-jaxrs/1.9.13/jackson-jaxrs-1.9.13-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_codehaus_jackson_jackson_xc_1_9_13.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.codehaus.jackson:jackson-xc:1.9.13">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/codehaus/jackson/jackson-xc/1.9.13/jackson-xc-1.9.13-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_fusesource_leveldbjni_leveldbjni_all_1_8.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.fusesource.leveldbjni:leveldbjni-all:1.8">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/fusesource/leveldbjni/leveldbjni-all/1.8/leveldbjni-all-1.8.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/fusesource/leveldbjni/leveldbjni-all/1.8/leveldbjni-all-1.8-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/fusesource/leveldbjni/leveldbjni-all/1.8/leveldbjni-all-1.8-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_hamcrest_hamcrest_core_1_3.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.hamcrest:hamcrest-core:1.3">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_mortbay_jetty_jetty_util_6_1_26.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.mortbay.jetty:jetty-util:6.1.26">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/mortbay/jetty/jetty-util/6.1.26/jetty-util-6.1.26.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/mortbay/jetty/jetty-util/6.1.26/jetty-util-6.1.26-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/mortbay/jetty/jetty-util/6.1.26/jetty-util-6.1.26-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_scala_lang_modules_scala_xml_2_11_1_0_4.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.scala-lang.modules:scala-xml_2.11:1.0.4">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/scala-lang/modules/scala-xml_2.11/1.0.4/scala-xml_2.11-1.0.4.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/scala-lang/modules/scala-xml_2.11/1.0.4/scala-xml_2.11-1.0.4-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/scala-lang/modules/scala-xml_2.11/1.0.4/scala-xml_2.11-1.0.4-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_13.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.slf4j:slf4j-api:1.7.13">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/slf4j/slf4j-api/1.7.13/slf4j-api-1.7.13.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/slf4j/slf4j-api/1.7.13/slf4j-api-1.7.13-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/slf4j/slf4j-api/1.7.13/slf4j-api-1.7.13-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_slf4j_slf4j_log4j12_1_7_13.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.slf4j:slf4j-log4j12:1.7.13">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/slf4j/slf4j-log4j12/1.7.13/slf4j-log4j12-1.7.13.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/slf4j/slf4j-log4j12/1.7.13/slf4j-log4j12-1.7.13-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/slf4j/slf4j-log4j12/1.7.13/slf4j-log4j12-1.7.13-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_spark_project_spark_unused_1_0_0.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.spark-project.spark:unused:1.0.0">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/spark-project/spark/unused/1.0.0/unused-1.0.0.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/spark-project/spark/unused/1.0.0/unused-1.0.0-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/spark-project/spark/unused/1.0.0/unused-1.0.0-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__org_tukaani_xz_1_0.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: org.tukaani:xz:1.0">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/org/tukaani/xz/1.0/xz-1.0.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/org/tukaani/xz/1.0/xz-1.0-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/org/tukaani/xz/1.0/xz-1.0-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__oro_oro_2_0_8.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: oro:oro:2.0.8">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/oro/oro/2.0.8/oro-2.0.8.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/oro/oro/2.0.8/oro-2.0.8-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/oro/oro/2.0.8/oro-2.0.8-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__xerces_xercesImpl_2_9_1.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: xerces:xercesImpl:2.9.1">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/xerces/xercesImpl/2.9.1/xercesImpl-2.9.1.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/xerces/xercesImpl/2.9.1/xercesImpl-2.9.1-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/xerces/xercesImpl/2.9.1/xercesImpl-2.9.1-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__xml_apis_xml_apis_1_3_04.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: xml-apis:xml-apis:1.3.04">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/xml-apis/xml-apis/1.3.04/xml-apis-1.3.04.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/xml-apis/xml-apis/1.3.04/xml-apis-1.3.04-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/xml-apis/xml-apis/1.3.04/xml-apis-1.3.04-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/libraries/Maven__xmlenc_xmlenc_0_52.xml:
--------------------------------------------------------------------------------
 1 | <component name="libraryTable">
 2 |   <library name="Maven: xmlenc:xmlenc:0.52">
 3 |     <CLASSES>
 4 |       <root url="jar://$MAVEN_REPOSITORY$/xmlenc/xmlenc/0.52/xmlenc-0.52.jar!/" />
 5 |     </CLASSES>
 6 |     <JAVADOC>
 7 |       <root url="jar://$MAVEN_REPOSITORY$/xmlenc/xmlenc/0.52/xmlenc-0.52-javadoc.jar!/" />
 8 |     </JAVADOC>
 9 |     <SOURCES>
10 |       <root url="jar://$MAVEN_REPOSITORY$/xmlenc/xmlenc/0.52/xmlenc-0.52-sources.jar!/" />
11 |     </SOURCES>
12 |   </library>
13 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="MavenProjectsManager">
 4 |     <option name="originalFiles">
 5 |       <list>
 6 |         <option value="$PROJECT_DIR$/pom.xml" />
 7 |       </list>
 8 |     </option>
 9 |   </component>
10 |   <component name="ProjectRootManager" version="2" languageLevel="JDK_1_3" default="false" project-jdk-name="1.8" project-jdk-type="JavaSDK">
11 |     <output url="file://$PROJECT_DIR$/../BigDataLearning/out" />
12 |   </component>
13 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/BigDataLearning.iml" filepath="$PROJECT_DIR$/BigDataLearning.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ScalaCompilerConfiguration">
4 |     <profile name="Maven 1" modules="SparkStreaming,SparkExample,BigDataLearning" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/uiDesigner.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project version="4">
  3 |   <component name="Palette2">
  4 |     <group name="Swing">
  5 |       <item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.png" removable="false" auto-create-binding="false" can-attach-label="false">
  6 |         <default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
  7 |       </item>
  8 |       <item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.png" removable="false" auto-create-binding="false" can-attach-label="false">
  9 |         <default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
 10 |       </item>
 11 |       <item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.png" removable="false" auto-create-binding="false" can-attach-label="false">
 12 |         <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
 13 |       </item>
 14 |       <item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.png" removable="false" auto-create-binding="false" can-attach-label="true">
 15 |         <default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
 16 |       </item>
 17 |       <item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.png" removable="false" auto-create-binding="true" can-attach-label="false">
 18 |         <default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
 19 |         <initial-values>
 20 |           <property name="text" value="Button" />
 21 |         </initial-values>
 22 |       </item>
 23 |       <item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.png" removable="false" auto-create-binding="true" can-attach-label="false">
 24 |         <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
 25 |         <initial-values>
 26 |           <property name="text" value="RadioButton" />
 27 |         </initial-values>
 28 |       </item>
 29 |       <item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.png" removable="false" auto-create-binding="true" can-attach-label="false">
 30 |         <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
 31 |         <initial-values>
 32 |           <property name="text" value="CheckBox" />
 33 |         </initial-values>
 34 |       </item>
 35 |       <item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.png" removable="false" auto-create-binding="false" can-attach-label="false">
 36 |         <default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
 37 |         <initial-values>
 38 |           <property name="text" value="Label" />
 39 |         </initial-values>
 40 |       </item>
 41 |       <item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.png" removable="false" auto-create-binding="true" can-attach-label="true">
 42 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
 43 |           <preferred-size width="150" height="-1" />
 44 |         </default-constraints>
 45 |       </item>
 46 |       <item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.png" removable="false" auto-create-binding="true" can-attach-label="true">
 47 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
 48 |           <preferred-size width="150" height="-1" />
 49 |         </default-constraints>
 50 |       </item>
 51 |       <item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.png" removable="false" auto-create-binding="true" can-attach-label="true">
 52 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
 53 |           <preferred-size width="150" height="-1" />
 54 |         </default-constraints>
 55 |       </item>
 56 |       <item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.png" removable="false" auto-create-binding="true" can-attach-label="true">
 57 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
 58 |           <preferred-size width="150" height="50" />
 59 |         </default-constraints>
 60 |       </item>
 61 |       <item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.png" removable="false" auto-create-binding="true" can-attach-label="true">
 62 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
 63 |           <preferred-size width="150" height="50" />
 64 |         </default-constraints>
 65 |       </item>
 66 |       <item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.png" removable="false" auto-create-binding="true" can-attach-label="true">
 67 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
 68 |           <preferred-size width="150" height="50" />
 69 |         </default-constraints>
 70 |       </item>
 71 |       <item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.png" removable="false" auto-create-binding="true" can-attach-label="true">
 72 |         <default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
 73 |       </item>
 74 |       <item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.png" removable="false" auto-create-binding="true" can-attach-label="false">
 75 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
 76 |           <preferred-size width="150" height="50" />
 77 |         </default-constraints>
 78 |       </item>
 79 |       <item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.png" removable="false" auto-create-binding="true" can-attach-label="false">
 80 |         <default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
 81 |           <preferred-size width="150" height="50" />
 82 |         </default-constraints>
 83 |       </item>
 84 |       <item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.png" removable="false" auto-create-binding="true" can-attach-label="false">
 85 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
 86 |           <preferred-size width="150" height="50" />
 87 |         </default-constraints>
 88 |       </item>
 89 |       <item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.png" removable="false" auto-create-binding="true" can-attach-label="false">
 90 |         <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
 91 |           <preferred-size width="200" height="200" />
 92 |         </default-constraints>
 93 |       </item>
 94 |       <item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.png" removable="false" auto-create-binding="false" can-attach-label="false">
 95 |         <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
 96 |           <preferred-size width="200" height="200" />
 97 |         </default-constraints>
 98 |       </item>
 99 |       <item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.png" removable="false" auto-create-binding="true" can-attach-label="true">
100 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
101 |       </item>
102 |       <item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.png" removable="false" auto-create-binding="true" can-attach-label="false">
103 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
104 |       </item>
105 |       <item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.png" removable="false" auto-create-binding="false" can-attach-label="false">
106 |         <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
107 |       </item>
108 |       <item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.png" removable="false" auto-create-binding="true" can-attach-label="false">
109 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
110 |       </item>
111 |       <item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.png" removable="false" auto-create-binding="false" can-attach-label="false">
112 |         <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
113 |           <preferred-size width="-1" height="20" />
114 |         </default-constraints>
115 |       </item>
116 |       <item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.png" removable="false" auto-create-binding="false" can-attach-label="false">
117 |         <default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
118 |       </item>
119 |       <item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.png" removable="false" auto-create-binding="true" can-attach-label="false">
120 |         <default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
121 |       </item>
122 |     </group>
123 |   </component>
124 | </project>


--------------------------------------------------------------------------------
/BigDataLearning.iml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
  3 |   <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_7">
  4 |     <output url="file://$MODULE_DIR$/target/classes" />
  5 |     <output-test url="file://$MODULE_DIR$/target/test-classes" />
  6 |     <content url="file://$MODULE_DIR$">
  7 |       <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
  8 |       <sourceFolder url="file://$MODULE_DIR$/src/main/scala" isTestSource="false" />
  9 |       <sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
 10 |       <sourceFolder url="file://$MODULE_DIR$/src/test/scala" isTestSource="true" />
 11 |       <excludeFolder url="file://$MODULE_DIR$/target" />
 12 |     </content>
 13 |     <orderEntry type="inheritedJdk" />
 14 |     <orderEntry type="sourceFolder" forTests="false" />
 15 |     <orderEntry type="library" name="scala-sdk-2.11.8" level="application" />
 16 |     <orderEntry type="library" name="Maven: org.apache.kafka:kafka_2.12:2.0.0" level="project" />
 17 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.9.6" level="project" />
 18 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-annotations:2.9.0" level="project" />
 19 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-core:2.9.6" level="project" />
 20 |     <orderEntry type="library" name="Maven: net.sf.jopt-simple:jopt-simple:5.0.4" level="project" />
 21 |     <orderEntry type="library" name="Maven: com.yammer.metrics:metrics-core:2.2.0" level="project" />
 22 |     <orderEntry type="library" name="Maven: org.scala-lang:scala-reflect:2.12.6" level="project" />
 23 |     <orderEntry type="library" name="Maven: com.typesafe.scala-logging:scala-logging_2.12:3.9.0" level="project" />
 24 |     <orderEntry type="library" name="Maven: com.101tec:zkclient:0.10" level="project" />
 25 |     <orderEntry type="library" name="Maven: org.apache.zookeeper:zookeeper:3.4.13" level="project" />
 26 |     <orderEntry type="library" name="Maven: org.apache.yetus:audience-annotations:0.5.0" level="project" />
 27 |     <orderEntry type="library" name="Maven: org.apache.kafka:kafka-clients:2.0.0" level="project" />
 28 |     <orderEntry type="library" name="Maven: org.lz4:lz4-java:1.4.1" level="project" />
 29 |     <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.1.7.1" level="project" />
 30 |     <orderEntry type="library" name="Maven: com.alibaba:fastjson:1.2.47" level="project" />
 31 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-sql_2.11:2.2.2" level="project" />
 32 |     <orderEntry type="library" name="Maven: com.univocity:univocity-parsers:2.2.1" level="project" />
 33 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-sketch_2.11:2.2.2" level="project" />
 34 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-catalyst_2.11:2.2.2" level="project" />
 35 |     <orderEntry type="library" name="Maven: org.codehaus.janino:janino:3.0.8" level="project" />
 36 |     <orderEntry type="library" name="Maven: org.codehaus.janino:commons-compiler:3.0.8" level="project" />
 37 |     <orderEntry type="library" name="Maven: org.antlr:antlr4-runtime:4.5.3" level="project" />
 38 |     <orderEntry type="library" name="Maven: commons-codec:commons-codec:1.10" level="project" />
 39 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-tags_2.11:2.2.2" level="project" />
 40 |     <orderEntry type="library" name="Maven: org.apache.parquet:parquet-column:1.8.2" level="project" />
 41 |     <orderEntry type="library" name="Maven: org.apache.parquet:parquet-common:1.8.2" level="project" />
 42 |     <orderEntry type="library" name="Maven: org.apache.parquet:parquet-encoding:1.8.2" level="project" />
 43 |     <orderEntry type="library" name="Maven: org.apache.parquet:parquet-hadoop:1.8.2" level="project" />
 44 |     <orderEntry type="library" name="Maven: org.apache.parquet:parquet-format:2.3.1" level="project" />
 45 |     <orderEntry type="library" name="Maven: org.apache.parquet:parquet-jackson:1.8.2" level="project" />
 46 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-mapper-asl:1.9.11" level="project" />
 47 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-core-asl:1.9.11" level="project" />
 48 |     <orderEntry type="library" name="Maven: org.apache.xbean:xbean-asm5-shaded:4.4" level="project" />
 49 |     <orderEntry type="library" name="Maven: org.spark-project.spark:unused:1.0.0" level="project" />
 50 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-core_2.11:2.2.2" level="project" />
 51 |     <orderEntry type="library" name="Maven: org.apache.avro:avro:1.7.7" level="project" />
 52 |     <orderEntry type="library" name="Maven: com.thoughtworks.paranamer:paranamer:2.3" level="project" />
 53 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.4.1" level="project" />
 54 |     <orderEntry type="library" name="Maven: org.tukaani:xz:1.0" level="project" />
 55 |     <orderEntry type="library" name="Maven: org.apache.avro:avro-mapred:hadoop2:1.7.7" level="project" />
 56 |     <orderEntry type="library" name="Maven: org.apache.avro:avro-ipc:1.7.7" level="project" />
 57 |     <orderEntry type="library" name="Maven: org.apache.avro:avro-ipc:tests:1.7.7" level="project" />
 58 |     <orderEntry type="library" name="Maven: com.twitter:chill_2.11:0.8.0" level="project" />
 59 |     <orderEntry type="library" name="Maven: com.esotericsoftware:kryo-shaded:3.0.3" level="project" />
 60 |     <orderEntry type="library" name="Maven: com.esotericsoftware:minlog:1.3.0" level="project" />
 61 |     <orderEntry type="library" name="Maven: org.objenesis:objenesis:2.1" level="project" />
 62 |     <orderEntry type="library" name="Maven: com.twitter:chill-java:0.8.0" level="project" />
 63 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-launcher_2.11:2.2.2" level="project" />
 64 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-network-common_2.11:2.2.2" level="project" />
 65 |     <orderEntry type="library" name="Maven: org.fusesource.leveldbjni:leveldbjni-all:1.8" level="project" />
 66 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-network-shuffle_2.11:2.2.2" level="project" />
 67 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-unsafe_2.11:2.2.2" level="project" />
 68 |     <orderEntry type="library" name="Maven: net.java.dev.jets3t:jets3t:0.9.3" level="project" />
 69 |     <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.3.3" level="project" />
 70 |     <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.3.6" level="project" />
 71 |     <orderEntry type="library" name="Maven: javax.activation:activation:1.1.1" level="project" />
 72 |     <orderEntry type="library" name="Maven: mx4j:mx4j:3.0.2" level="project" />
 73 |     <orderEntry type="library" name="Maven: javax.mail:mail:1.4.7" level="project" />
 74 |     <orderEntry type="library" name="Maven: org.bouncycastle:bcprov-jdk15on:1.51" level="project" />
 75 |     <orderEntry type="library" name="Maven: com.jamesmurty.utils:java-xmlbuilder:1.0" level="project" />
 76 |     <orderEntry type="library" name="Maven: net.iharder:base64:2.3.8" level="project" />
 77 |     <orderEntry type="library" name="Maven: org.apache.curator:curator-recipes:2.6.0" level="project" />
 78 |     <orderEntry type="library" name="Maven: org.apache.curator:curator-framework:2.6.0" level="project" />
 79 |     <orderEntry type="library" name="Maven: com.google.guava:guava:16.0.1" level="project" />
 80 |     <orderEntry type="library" name="Maven: javax.servlet:javax.servlet-api:3.1.0" level="project" />
 81 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.5" level="project" />
 82 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.4.1" level="project" />
 83 |     <orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:1.3.9" level="project" />
 84 |     <orderEntry type="library" name="Maven: org.slf4j:jul-to-slf4j:1.7.16" level="project" />
 85 |     <orderEntry type="library" name="Maven: org.slf4j:jcl-over-slf4j:1.7.16" level="project" />
 86 |     <orderEntry type="library" name="Maven: com.ning:compress-lzf:1.0.3" level="project" />
 87 |     <orderEntry type="library" name="Maven: net.jpountz.lz4:lz4:1.3.0" level="project" />
 88 |     <orderEntry type="library" name="Maven: org.roaringbitmap:RoaringBitmap:0.5.11" level="project" />
 89 |     <orderEntry type="library" name="Maven: commons-net:commons-net:2.2" level="project" />
 90 |     <orderEntry type="library" name="Maven: org.json4s:json4s-jackson_2.11:3.2.11" level="project" />
 91 |     <orderEntry type="library" name="Maven: org.json4s:json4s-core_2.11:3.2.11" level="project" />
 92 |     <orderEntry type="library" name="Maven: org.json4s:json4s-ast_2.11:3.2.11" level="project" />
 93 |     <orderEntry type="library" name="Maven: org.scala-lang:scalap:2.11.0" level="project" />
 94 |     <orderEntry type="library" name="Maven: org.scala-lang:scala-compiler:2.11.0" level="project" />
 95 |     <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-parser-combinators_2.11:1.0.1" level="project" />
 96 |     <orderEntry type="library" name="Maven: org.glassfish.jersey.core:jersey-client:2.22.2" level="project" />
 97 |     <orderEntry type="library" name="Maven: javax.ws.rs:javax.ws.rs-api:2.0.1" level="project" />
 98 |     <orderEntry type="library" name="Maven: org.glassfish.hk2:hk2-api:2.4.0-b34" level="project" />
 99 |     <orderEntry type="library" name="Maven: org.glassfish.hk2:hk2-utils:2.4.0-b34" level="project" />
100 |     <orderEntry type="library" name="Maven: org.glassfish.hk2.external:aopalliance-repackaged:2.4.0-b34" level="project" />
101 |     <orderEntry type="library" name="Maven: org.glassfish.hk2.external:javax.inject:2.4.0-b34" level="project" />
102 |     <orderEntry type="library" name="Maven: org.glassfish.hk2:hk2-locator:2.4.0-b34" level="project" />
103 |     <orderEntry type="library" name="Maven: org.javassist:javassist:3.18.1-GA" level="project" />
104 |     <orderEntry type="library" name="Maven: org.glassfish.jersey.core:jersey-common:2.22.2" level="project" />
105 |     <orderEntry type="library" name="Maven: javax.annotation:javax.annotation-api:1.2" level="project" />
106 |     <orderEntry type="library" name="Maven: org.glassfish.jersey.bundles.repackaged:jersey-guava:2.22.2" level="project" />
107 |     <orderEntry type="library" name="Maven: org.glassfish.hk2:osgi-resource-locator:1.0.1" level="project" />
108 |     <orderEntry type="library" name="Maven: org.glassfish.jersey.core:jersey-server:2.22.2" level="project" />
109 |     <orderEntry type="library" name="Maven: org.glassfish.jersey.media:jersey-media-jaxb:2.22.2" level="project" />
110 |     <orderEntry type="library" name="Maven: javax.validation:validation-api:1.1.0.Final" level="project" />
111 |     <orderEntry type="library" name="Maven: org.glassfish.jersey.containers:jersey-container-servlet:2.22.2" level="project" />
112 |     <orderEntry type="library" name="Maven: org.glassfish.jersey.containers:jersey-container-servlet-core:2.22.2" level="project" />
113 |     <orderEntry type="library" name="Maven: io.netty:netty-all:4.0.43.Final" level="project" />
114 |     <orderEntry type="library" name="Maven: io.netty:netty:3.9.9.Final" level="project" />
115 |     <orderEntry type="library" name="Maven: com.clearspring.analytics:stream:2.7.0" level="project" />
116 |     <orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-core:3.1.2" level="project" />
117 |     <orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-jvm:3.1.2" level="project" />
118 |     <orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-json:3.1.2" level="project" />
119 |     <orderEntry type="library" name="Maven: io.dropwizard.metrics:metrics-graphite:3.1.2" level="project" />
120 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.module:jackson-module-scala_2.11:2.6.5" level="project" />
121 |     <orderEntry type="library" name="Maven: com.fasterxml.jackson.module:jackson-module-paranamer:2.6.5" level="project" />
122 |     <orderEntry type="library" name="Maven: org.apache.ivy:ivy:2.4.0" level="project" />
123 |     <orderEntry type="library" name="Maven: oro:oro:2.0.8" level="project" />
124 |     <orderEntry type="library" name="Maven: net.razorvine:pyrolite:4.13" level="project" />
125 |     <orderEntry type="library" name="Maven: net.sf.py4j:py4j:0.10.7" level="project" />
126 |     <orderEntry type="library" name="Maven: org.apache.commons:commons-crypto:1.0.0" level="project" />
127 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-client:2.7.5" level="project" />
128 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-common:2.7.5" level="project" />
129 |     <orderEntry type="library" name="Maven: commons-cli:commons-cli:1.2" level="project" />
130 |     <orderEntry type="library" name="Maven: xmlenc:xmlenc:0.52" level="project" />
131 |     <orderEntry type="library" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" />
132 |     <orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" />
133 |     <orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.2" level="project" />
134 |     <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty-sslengine:6.1.26" level="project" />
135 |     <orderEntry type="library" scope="RUNTIME" name="Maven: javax.servlet.jsp:jsp-api:2.1" level="project" />
136 |     <orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.3" level="project" />
137 |     <orderEntry type="library" name="Maven: commons-lang:commons-lang:2.6" level="project" />
138 |     <orderEntry type="library" name="Maven: commons-configuration:commons-configuration:1.6" level="project" />
139 |     <orderEntry type="library" name="Maven: commons-digester:commons-digester:1.8" level="project" />
140 |     <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.7.0" level="project" />
141 |     <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils-core:1.8.0" level="project" />
142 |     <orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:2.5.0" level="project" />
143 |     <orderEntry type="library" name="Maven: com.google.code.gson:gson:2.2.4" level="project" />
144 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-auth:2.7.5" level="project" />
145 |     <orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-kerberos-codec:2.0.0-M15" level="project" />
146 |     <orderEntry type="library" name="Maven: org.apache.directory.server:apacheds-i18n:2.0.0-M15" level="project" />
147 |     <orderEntry type="library" name="Maven: org.apache.directory.api:api-asn1-api:1.0.0-M20" level="project" />
148 |     <orderEntry type="library" name="Maven: org.apache.directory.api:api-util:1.0.0-M20" level="project" />
149 |     <orderEntry type="library" name="Maven: org.apache.curator:curator-client:2.7.1" level="project" />
150 |     <orderEntry type="library" name="Maven: org.apache.htrace:htrace-core:3.1.0-incubating" level="project" />
151 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-hdfs:2.7.5" level="project" />
152 |     <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty-util:6.1.26" level="project" />
153 |     <orderEntry type="library" name="Maven: xerces:xercesImpl:2.9.1" level="project" />
154 |     <orderEntry type="library" name="Maven: xml-apis:xml-apis:1.3.04" level="project" />
155 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-app:2.7.5" level="project" />
156 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-common:2.7.5" level="project" />
157 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-client:2.7.5" level="project" />
158 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-server-common:2.7.5" level="project" />
159 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-shuffle:2.7.5" level="project" />
160 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-api:2.7.5" level="project" />
161 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-core:2.7.5" level="project" />
162 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-yarn-common:2.7.5" level="project" />
163 |     <orderEntry type="library" name="Maven: javax.xml.bind:jaxb-api:2.2.2" level="project" />
164 |     <orderEntry type="library" name="Maven: javax.xml.stream:stax-api:1.0-2" level="project" />
165 |     <orderEntry type="library" name="Maven: javax.servlet:servlet-api:2.5" level="project" />
166 |     <orderEntry type="library" name="Maven: com.sun.jersey:jersey-core:1.9" level="project" />
167 |     <orderEntry type="library" name="Maven: com.sun.jersey:jersey-client:1.9" level="project" />
168 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-jaxrs:1.9.13" level="project" />
169 |     <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-xc:1.9.13" level="project" />
170 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.7.5" level="project" />
171 |     <orderEntry type="library" name="Maven: org.apache.hadoop:hadoop-annotations:2.7.5" level="project" />
172 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-streaming_2.11:2.2.2" level="project" />
173 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-streaming-flume_2.11:2.3.1" level="project" />
174 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-streaming-flume-sink_2.11:2.3.1" level="project" />
175 |     <orderEntry type="library" name="Maven: org.apache.flume:flume-ng-core:1.6.0" level="project" />
176 |     <orderEntry type="library" name="Maven: org.apache.flume:flume-ng-configuration:1.6.0" level="project" />
177 |     <orderEntry type="library" name="Maven: joda-time:joda-time:2.1" level="project" />
178 |     <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty:6.1.26" level="project" />
179 |     <orderEntry type="library" name="Maven: org.apache.mina:mina-core:2.0.4" level="project" />
180 |     <orderEntry type="library" name="Maven: org.apache.flume:flume-ng-sdk:1.6.0" level="project" />
181 |     <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.11.8" level="project" />
182 |     <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-xml_2.11:1.0.4" level="project" />
183 |     <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.13" level="project" />
184 |     <orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.13" level="project" />
185 |     <orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
186 |     <orderEntry type="library" scope="TEST" name="Maven: org.apache.camel:camel-test:2.17.0" level="project" />
187 |     <orderEntry type="library" scope="TEST" name="Maven: org.apache.camel:camel-core:2.17.0" level="project" />
188 |     <orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.11" level="project" />
189 |     <orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
190 |     <orderEntry type="library" scope="TEST" name="Maven: com.sun.xml.bind:jaxb-core:2.2.11" level="project" />
191 |     <orderEntry type="library" scope="TEST" name="Maven: com.sun.xml.bind:jaxb-impl:2.2.11" level="project" />
192 |   </component>
193 | </module>


--------------------------------------------------------------------------------
/META-INF/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | Main-Class: com.mobin.sparkStreaming.GenerateChar
3 | 
4 | 


--------------------------------------------------------------------------------
/RandomPrefix.txt:
--------------------------------------------------------------------------------
1 | Hello
2 | Hello
3 | HI
4 | HI


--------------------------------------------------------------------------------
/SampleJoin1.txt:
--------------------------------------------------------------------------------
 1 | 1,a1
 2 | 1,a2
 3 | 1,a3
 4 | 1,a4
 5 | 1,a5
 6 | 1,a6
 7 | 1,a7
 8 | 1,a8
 9 | 1,a9
10 | 1,a10
11 | 1,a11
12 | 1,a12
13 | 1,a13
14 | 1,a14
15 | 1,a15
16 | 1,a16
17 | 1,a17
18 | 1,a18
19 | 1,a19
20 | 1,a20
21 | 1,a21
22 | 1,a22
23 | 2,b
24 | 2,b


--------------------------------------------------------------------------------
/SampleJoin2.txt:
--------------------------------------------------------------------------------
1 | 1,a
2 | 2,b
3 | 3,c


--------------------------------------------------------------------------------
/mapjoin.txt:
--------------------------------------------------------------------------------
1 | 1,2,3
2 | 2,4,5


--------------------------------------------------------------------------------
/mapjoin1.txt:
--------------------------------------------------------------------------------
1 | 1,A,B
2 | 2,C,D


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  3 | 
  4 |   <modelVersion>4.0.0</modelVersion>
  5 | 
  6 |   <groupId>com.mobin</groupId>
  7 |   <artifactId>BigDataLearning</artifactId>
  8 |   <packaging>jar</packaging>
  9 |   <version>1.0-SNAPSHOT</version>
 10 | 
 11 |   <name>A Camel Scala Route</name>
 12 |   <url>http://www.myorganization.org</url>
 13 | 
 14 |   <properties>
 15 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 16 |     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
 17 |   </properties>
 18 | 
 19 |   <dependencies>
 20 | 
 21 | 
 22 |     <dependency>
 23 |       <groupId>org.apache.kafka</groupId>
 24 |       <artifactId>kafka_2.12</artifactId>
 25 |       <version>2.0.0</version>
 26 |     </dependency>
 27 | 
 28 |     <!-- https://mvnrepository.com/artifact/org.apache.kafka/kafka-clients -->
 29 |     <dependency>
 30 |     <groupId>org.apache.kafka</groupId>
 31 |     <artifactId>kafka-clients</artifactId>
 32 |     <version>2.0.0</version>
 33 |     </dependency>
 34 | 
 35 |     <!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
 36 |     <dependency>
 37 |       <groupId>com.alibaba</groupId>
 38 |       <artifactId>fastjson</artifactId>
 39 |       <version>1.2.47</version>
 40 |     </dependency>
 41 | 
 42 |     <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
 43 |     <dependency>
 44 |       <groupId>org.apache.spark</groupId>
 45 |       <artifactId>spark-sql_2.11</artifactId>
 46 |       <version>2.2.2</version>
 47 |     </dependency>
 48 |     <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
 49 |     <dependency>
 50 |       <groupId>org.apache.spark</groupId>
 51 |       <artifactId>spark-core_2.11</artifactId>
 52 |       <version>2.2.2</version>
 53 |     </dependency>
 54 | 
 55 |     <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
 56 |     <dependency>
 57 |       <groupId>org.apache.hadoop</groupId>
 58 |       <artifactId>hadoop-client</artifactId>
 59 |       <version>2.7.5</version>
 60 |     </dependency>
 61 | 
 62 | 
 63 |     <dependency>
 64 |       <groupId>org.apache.spark</groupId>
 65 |       <artifactId>spark-streaming_2.11</artifactId>
 66 |       <version>2.2.2</version>
 67 |     </dependency>
 68 | 
 69 |     <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming-flume -->
 70 |     <dependency>
 71 |       <groupId>org.apache.spark</groupId>
 72 |       <artifactId>spark-streaming-flume_2.11</artifactId>
 73 |       <version>2.3.1</version>
 74 |     </dependency>
 75 | 
 76 | 
 77 |     <!-- scala -->
 78 |     <dependency>
 79 |       <groupId>org.scala-lang</groupId>
 80 |       <artifactId>scala-library</artifactId>
 81 |       <version>2.11.8</version>
 82 |     </dependency>
 83 |     <dependency>
 84 |       <groupId>org.scala-lang.modules</groupId>
 85 |       <artifactId>scala-xml_2.11</artifactId>
 86 |       <version>1.0.4</version>
 87 |     </dependency>
 88 | 
 89 |     <!-- logging -->
 90 |     <dependency>
 91 |       <groupId>org.slf4j</groupId>
 92 |       <artifactId>slf4j-api</artifactId>
 93 |       <version>1.7.13</version>
 94 |     </dependency>
 95 |     <dependency>
 96 |       <groupId>org.slf4j</groupId>
 97 |       <artifactId>slf4j-log4j12</artifactId>
 98 |       <version>1.7.13</version>
 99 |     </dependency>
100 |     <dependency>
101 |       <groupId>log4j</groupId>
102 |       <artifactId>log4j</artifactId>
103 |       <version>1.2.17</version>
104 |     </dependency>
105 | 
106 |     <!-- testing -->
107 |     <dependency>
108 |       <groupId>org.apache.camel</groupId>
109 |       <artifactId>camel-test</artifactId>
110 |       <version>2.17.0</version>
111 |       <scope>test</scope>
112 |     </dependency>
113 |   </dependencies>
114 | 
115 |   <build>
116 |     <defaultGoal>install</defaultGoal>
117 |     <sourceDirectory>src/main/scala</sourceDirectory>
118 |     <testSourceDirectory>src/test/scala</testSourceDirectory>
119 | 
120 |     <plugins>
121 | 
122 |       <!-- the Maven compiler plugin will compile Java source files -->
123 |       <plugin>
124 |         <groupId>org.apache.maven.plugins</groupId>
125 |         <artifactId>maven-compiler-plugin</artifactId>
126 |         <version>3.5.1</version>
127 |         <configuration>
128 |           <source>1.7</source>
129 |           <target>1.7</target>
130 |         </configuration>
131 |       </plugin>
132 |       <plugin>
133 |         <groupId>org.apache.maven.plugins</groupId>
134 |         <artifactId>maven-resources-plugin</artifactId>
135 |         <version>2.6</version>
136 |         <configuration>
137 |           <encoding>UTF-8</encoding>
138 |         </configuration>
139 |       </plugin>
140 | 
141 |       <!-- the Maven Scala plugin will compile Scala source files -->
142 |       <plugin>
143 |         <groupId>net.alchim31.maven</groupId>
144 |         <artifactId>scala-maven-plugin</artifactId>
145 |         <version>3.2.2</version>
146 |         <executions>
147 |           <execution>
148 |             <goals>
149 |               <goal>compile</goal>
150 |               <goal>testCompile</goal>
151 |             </goals>
152 |           </execution>
153 |         </executions>
154 |       </plugin>
155 | 
156 |       <!-- configure the eclipse plugin to generate eclipse project descriptors for a Scala project -->
157 |       <plugin>
158 |         <groupId>org.apache.maven.plugins</groupId>
159 |         <artifactId>maven-eclipse-plugin</artifactId>
160 |         <version>2.10</version>
161 |         <configuration>
162 |           <projectnatures>
163 |             <projectnature>org.scala-ide.sdt.core.scalanature</projectnature>
164 |             <projectnature>org.eclipse.jdt.core.javanature</projectnature>
165 |           </projectnatures>
166 |           <buildcommands>
167 |             <buildcommand>org.scala-ide.sdt.core.scalabuilder</buildcommand>
168 |           </buildcommands>
169 |           <classpathContainers>
170 |             <classpathContainer>org.scala-ide.sdt.launching.SCALA_CONTAINER</classpathContainer>
171 |             <classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
172 |           </classpathContainers>
173 |           <excludes>
174 |             <exclude>org.scala-lang:scala-library</exclude>
175 |             <exclude>org.scala-lang:scala-compiler</exclude>
176 |           </excludes>
177 |           <sourceIncludes>
178 |             <sourceInclude>**/*.scala</sourceInclude>
179 |             <sourceInclude>**/*.java</sourceInclude>
180 |           </sourceIncludes>
181 |         </configuration>
182 |       </plugin>
183 | 
184 |       <plugin>
185 |        <groupId>org.apache.maven.plugins</groupId>
186 |        <artifactId>maven-assembly-plugin</artifactId>
187 |        <version>2.5.5</version>
188 |        <configuration>
189 |         <descriptorRefs>
190 |           <descriptorRef>jar-with-dependencies</descriptorRef>
191 |         </descriptorRefs>
192 |       </configuration>
193 |     </plugin>
194 | 
195 |     </plugins>
196 |   </build>
197 | 
198 | </project>
199 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | BigData Framework learning examples


--------------------------------------------------------------------------------
/src/main/resources/META-INF/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | Main-Class: com.mobin.sparkStreaming.com.mobin.sparkStreaming.FileStre
3 |  aming
4 | 
5 | 


--------------------------------------------------------------------------------
/src/main/resources/core-site.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | 
3 | <configuration>
4 |   <property>
5 |     <name>io.compression.codecs</name>
6 |     <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.DeflateCodec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value>
7 |   </property>
8 | </configuration>
9 | 


--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | #
 2 | # The logging properties used
 3 | #
 4 | log4j.rootLogger=INFO, out
 5 | 
 6 | # uncomment the following line to turn on Camel debugging
 7 | #log4j.logger.org.apache.camel=DEBUG
 8 | 
 9 | log4j.logger.org.springframework=WARN
10 | 
11 | # CONSOLE appender not used by default
12 | log4j.appender.out=org.apache.log4j.ConsoleAppender
13 | log4j.appender.out.layout=org.apache.log4j.PatternLayout
14 | log4j.appender.out.layout.ConversionPattern=[%30.30t] %-30.30c{1} %-5p %m%n
15 | #log4j.appender.out.layout.ConversionPattern=%d [%-15.15t] %-5p %-30.30c{1} - %m%n
16 | 
17 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Advanced_Analytics_with_Spark/NaStatCounter.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.Advanced_Analytics_with_Spark
 2 | 
 3 | import org.apache.spark.util.StatCounter
 4 | /**
 5 |   * Created by Mobin on 2017/3/8.
 6 |   */
 7 | class NaStatCounter extends  Serializable{
 8 | 
 9 |   val  stats: StatCounter = new StatCounter()
10 |   var missing: Long = 0
11 | 
12 |   def add(x: Double): NaStatCounter = {
13 |     if(java.lang.Double.isNaN(x)){
14 |       missing += 1
15 |     } else {
16 |       stats.merge(x)
17 |     }
18 |     this
19 |   }
20 | 
21 |   def merge(other: NaStatCounter): NaStatCounter = {
22 |     stats.merge(other.stats)
23 |     missing += other.missing
24 |     this
25 |   }
26 | 
27 |   override def toString = {
28 |     "stats: " + stats.toString()  + "NaN: " + missing
29 |   }
30 | }
31 | 
32 | object NaStatCounter extends  Serializable{
33 |   def apply(x: Double) = new NaStatCounter().add(x )
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Advanced_Analytics_with_Spark/Patient.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.Advanced_Analytics_with_Spark
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by Mobin on 2017/3/7.
 7 |   */
 8 | case class MatchData(id1: Int, id2: Int, scores: Array[Double], matched: Boolean)
 9 | object Patient {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setMaster("local").setAppName("Patient")
13 |     val sc = new SparkContext(conf)
14 |     val rawblocks = sc.textFile(args(0))
15 |     val mds = rawblocks.filter(!isHeader(_)).map(pares)
16 |    // val grouped = mds.groupBy(x => x.matched).mapValues(x => x.size).foreach(println)  //按matched分组统计
17 |    // val sort = mds.map(x => x.matched).sortBy(_).foreach(println)
18 |     val nsdRDD = mds.map(md =>
19 |      md.scores.map(d => NaStatCounter(d))
20 |    ).foreach(x => println(x(1)))
21 |   }
22 | 
23 |   def isHeader(line: String): Boolean = {
24 |     line.contains("id_1")
25 |   }
26 | 
27 |   def toDouble(s: String): Double = {
28 |     if ("?".equals(s))
29 |       Double.NaN
30 |     else
31 |       s.toDouble
32 |   }
33 | 
34 |   def  pares(line: String)={
35 |     val pieces = line.split(",")
36 |     val id1 = pieces(0).toInt
37 |     val id2 = pieces(1).toInt
38 |     val scores = pieces.slice(2,11).map(toDouble)  //取数据的[2,11)位并转化成Double类型
39 |     val matched = pieces(11).toBoolean
40 |     MatchData(id1, id2, scores, matched)
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/AudienceAnalysis.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.Example
 2 | 
 3 | /**
 4 |   * Created by Mobin on 2016/11/15.
 5 |   */
 6 | object AudienceAnalysis {
 7 | 
 8 |   lazy val nameIndexMap = {
 9 |     val nameIndexMap = scala.collection.mutable.HashMap.empty[String, Int]
10 |     val basicNames = Seq("first_name", "last_name", "email", "company", "job", "street_address", "city",
11 |     "state_abbr", "zipcode_plus4", "url", "phoen_number", "user_agent", "user_name")
12 |     nameIndexMap ++= basicNames zip (0 to 12)
13 |     for(i <- 0 to 328){
14 |       nameIndexMap ++= Seq(("letter_" + i, i * 3 + 13),("number_" + i, i * 3 +14), ("bool_" + i, i *3 +15))
15 |     }
16 | 
17 |     nameIndexMap
18 |   }
19 | 
20 |   def $(name: String): Int = nameIndexMap.getOrElse(name, -1)
21 |  }
22 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/GenerateHFile.java:
--------------------------------------------------------------------------------
 1 | //package com.mobin.Example;
 2 | //
 3 | //import org.apache.hadoop.conf.Configuration;
 4 | //import org.apache.hadoop.fs.Path;
 5 | //import org.apache.hadoop.hbase.HBaseConfiguration;
 6 | //import org.apache.hadoop.hbase.TableName;
 7 | //import org.apache.hadoop.hbase.client.*;
 8 | //import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 9 | //import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
10 | //import org.apache.hadoop.io.LongWritable;
11 | //import org.apache.hadoop.io.Text;
12 | //import org.apache.hadoop.mapreduce.Job;
13 | //import org.apache.hadoop.mapreduce.Mapper;
14 | //import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
15 | //import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
16 | //
17 | //import java.io.IOException;
18 | //
19 | ///**
20 | // * Created by Mobin on 2016/12/22.
21 | // */
22 | //public class GenerateHFile {
23 | //
24 | //    static class HFileMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put>{
25 | //        @Override
26 | //        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
27 | //            String[] line = value.toString().split(",");
28 | //            String rk = line[0];
29 | //            ImmutableBytesWritable rowkey = new ImmutableBytesWritable(rk.getBytes());
30 | //            Put put = new Put(rk.getBytes());
31 | //            put.addColumn("S".getBytes(),"name".getBytes(), line[1].getBytes());
32 | //            put.addColumn("S".getBytes(), "sex".getBytes(), line[2].getBytes());
33 | //            put.addColumn("S".getBytes(), "age".getBytes(), line[3].getBytes());
34 | //            put.addColumn("S".getBytes(), "class".getBytes(), line[4].getBytes());
35 | //            context.write(rowkey, put);
36 | //        }
37 | //    }
38 | //
39 | //    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
40 | //        final String INPUT_PATH = "/DATA/PUBLIC/NOCE/SGC/Student.txt";
41 | //        final String OUT_PATH = "/DATA/PUBLIC/NOCE/SGC/HFILE";
42 | //        Configuration conf = HBaseConfiguration.create();
43 | //        HTable table = new HTable(conf,"STUDENT");
44 | //        Job job = Job.getInstance(conf);
45 | //        job.setJarByClass(GenerateHFile.class);
46 | //        job.setMapperClass(HFileMapper.class);
47 | //        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
48 | //        job.setMapOutputValueClass(Put.class);
49 | //
50 | //        job.setOutputFormatClass(HFileOutputFormat2.class);
51 | //        HFileOutputFormat2.configureIncrementalLoad(job,table,table.getRegionLocator());
52 | //        FileInputFormat.setInputPaths(job, INPUT_PATH);
53 | //        FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
54 | //        System.exit(job.waitForCompletion(true)?0:1);
55 | //
56 | //    }
57 | //}
58 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/HiveDataBaseConnection.java:
--------------------------------------------------------------------------------
 1 | //package com.mobin.Example;
 2 | //
 3 | //import org.apache.hadoop.hive.ql.metadata.Hive;
 4 | //
 5 | //import java.sql.Connection;
 6 | //import java.sql.DriverManager;
 7 | //import java.sql.SQLException;
 8 | //
 9 | ///**
10 | // * Created by MOBIN on 2016/9/21.
11 | // */
12 | //public class HiveDataBaseConnection {
13 | //    private final static String DriverName = "org.apache.hive.jdbc.HiveDriver";
14 | //    private final static String URL = "jdbc:hive2://132.122.70.2:10000/default";
15 | //    private final static String UserName = "";
16 | //    private final static String Password = "";
17 | //    private Connection con;
18 | //
19 | //    public HiveDataBaseConnection(){
20 | //        try {
21 | //            Class.forName(DriverName);
22 | //            con = DriverManager.getConnection(URL,UserName, Password);
23 | //            System.out.println(con);
24 | //        } catch (ClassNotFoundException e) {
25 | //            e.printStackTrace();
26 | //        } catch (SQLException e) {
27 | //            e.printStackTrace();
28 | //        }
29 | //    }
30 | //
31 | //    public Connection getConnection(){
32 | //        return con;
33 | //    }
34 | //
35 | //    public void Close(){
36 | //            try {
37 | //                if(con != null)
38 | //                  con.close();
39 | //            } catch (SQLException e) {
40 | //                e.printStackTrace();
41 | //            }
42 | //    }
43 | //
44 | //    public static void main(String[] args) {
45 | //      HiveDataBaseConnection connection = new HiveDataBaseConnection();
46 | //    }
47 | //}
48 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/PutDataToHBase.java:
--------------------------------------------------------------------------------
 1 | //package com.mobin.Example;
 2 | //
 3 | //import org.apache.hadoop.conf.Configuration;
 4 | //import org.apache.hadoop.fs.Path;
 5 | //import org.apache.hadoop.hbase.HBaseConfiguration;
 6 | //import org.apache.hadoop.hbase.client.HTable;
 7 | //import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
 8 | //
 9 | //
10 | ///**
11 | // * Created by Mobin on 2016/12/22.
12 | // */
13 | //public class PutDataToHBase {
14 | //    public static void main(String[] args) throws Exception {
15 | //        Configuration conf = HBaseConfiguration.create();
16 | //        LoadIncrementalHFiles load = new LoadIncrementalHFiles(conf);
17 | //        load.doBulkLoad(new Path("HFILE"), new HTable(conf,"STUDENT"));
18 | //    }
19 | //}
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/RandomPrefix_Shuffle.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.Example
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | import scala.util.Random
 6 | 
 7 | /**
 8 |   * Created by Mobin on 2017/8/29.
 9 |   * 先局部聚合再全局聚合
10 |   */
11 | object RandomPrefix_Shuffle {
12 | 
13 |   def main(args: Array[String]) {
14 |     val conf = new SparkConf().setMaster("local[*]").setAppName("RandomPrefix")
15 |     val sc = new SparkContext(conf)
16 |     val line = sc.textFile("RandomPrefix.txt").map((_,1))
17 |     val randomPrefixRdd = line.map(x => {
18 |       val random = Random
19 |       val prefix = random.nextInt(10)
20 |       (prefix + "_" + x._1  , x._2)
21 |     })
22 | 
23 |     val localAggrRdd = randomPrefixRdd.reduceByKey(_ + _)
24 |     val removeRandPrefixRdd = localAggrRdd.map(x => {
25 |       val k = x._1.split("_")(1)
26 |       (k, x._2)
27 |     })
28 |     val globalAggrRdd = removeRandPrefixRdd.reduceByKey(_ + _)
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/Sample_Shuffle.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.Example
 2 | 
 3 | import java.util
 4 | 
 5 | import org.apache.spark.{SparkContext, SparkConf}
 6 | 
 7 | import scala.util.Random
 8 | 
 9 | /**
10 |   * Created by Mobin on 2017/8/30.
11 |   *方案实现思路：(Spark性能优化指南——高级篇[美团点评技术博客])
12 | 1.对包含少数几个数据量过大的key的那个RDD，通过sample算子采样出一份样本来，然后统计一下每个key的数量，计算出来数据量最大的是哪几个key。
13 | 
14 | 2.然后将这几个key对应的数据从原来的RDD中拆分出来，形成一个单独的RDD，并给每个key都打上n以内的随机数作为前缀，而不会导致倾斜的大部分key形成另外一个RDD。
15 | 
16 | 3.接着将需要join的另一个RDD，也过滤出来那几个倾斜key对应的数据并形成一个单独的RDD，将每条数据膨胀成n条数据，这n条数据都按顺序附加一个0~n的前缀，不会导致倾斜的大部分key也形成另外一个RDD。
17 | 
18 | 4.再将附加了随机前缀的独立RDD与另一个膨胀n倍的独立RDD进行join，此时就可以将原先相同的key打散成n份，分散到多个task中去进行join了。
19 | 
20 | 5.而另外两个普通的RDD就照常join即可。
21 | 
22 | 6.最后将两次join的结果使用union算子合并起来即可，就是最终的join结果。
23 |   */
24 | object Sample_Shuffle {
25 | 
26 |   def main(args: Array[String]) {
27 |     val conf = new SparkConf().setMaster("local[*]").setAppName("sample")
28 |     val sc = new SparkContext(conf)
29 | 
30 |     val rdd1 = sc.textFile("SampleJoin1.txt").map(x => {
31 |       val kv = x.split(",")
32 |       (kv(0), kv(1))
33 |     })
34 | 
35 |     val sampleRdd = rdd1.sample(false, 0.1)                                                           //对rdd1进行采样
36 |     val countSampleRdd = sampleRdd.map(x =>(x._1, 1)).reduceByKey(_ + _)   //统计出各key的频数
37 |     val reversedSampleRdd = countSampleRdd.map(x => (x._2, x._1))
38 |     val skewedUserid = reversedSampleRdd.sortByKey(false).take(1)(0)._2        //对频数进行排序,得到频数最高的对应的key
39 |     val skewRdd = rdd1.filter(_._1.equals(skewedUserid))                                    //从RDD1中拆分出导致数据倾斜的key，形成独立的RDD
40 |     val commonRdd = rdd1.filter(!_._1.equals(skewedUserid))                            //从RDD1中拆分出不会导致数据倾斜的key，形成独立的RDD
41 | 
42 |     val rdd2 = sc.textFile("SampleJoin2.txt").map(x => {
43 |       val kv = x.split(",")
44 |       (kv(0), kv(1))
45 |     })
46 | 
47 |     println("skew: " + skewedUserid)
48 |     //对RDD2中skew key扩充100倍
49 |     val skewRdd2 = rdd2.filter(_._1.equals(skewedUserid)).flatMap(x => {
50 |       for(i <- 1 to 10)yield((i + "_" + x._1, x._2))
51 |     })
52 | 
53 |     //为skewRdd中的每条数据都打上随机前缀并join上dkewRdd2
54 |     val joinRdd = skewRdd.map(x=>{
55 |       val prefix = Random.nextInt(10)
56 |       (prefix + "_" + x._1, x._2)
57 |     }).join(skewRdd2).map(x => {
58 |       val key = x._1.split("_")(1)
59 |       (key,x._2)
60 |     })
61 | 
62 |     val joinRdd2 = commonRdd.join(rdd2)
63 |     val resultRdd = joinRdd.union(joinRdd2)
64 |     resultRdd.foreach(println)
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/ScoresDataGenerator.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.Example
 2 | 
 3 | import java.io.FileWriter
 4 | 
 5 | import scala.util.Random
 6 | 
 7 | /**
 8 |   * Created by Mobin on 2016/12/22.
 9 |   * sno string, //学号
10 |   * semester int, //学期
11 |   * math int,   //  数学成绩
12 |   * en int,  //     英语成绩
13 |   * c int,   //     C语言成绩
14 |   * os int   //   操作系统成绩
15 | 
16 |   */
17 | object ScoresDataGenerator {
18 |   private val FILE_OUTPATH = "Scores.txt"
19 |   private val MAX_RECORD = 1000;
20 | 
21 |   def main(args: Array[String]) {
22 |     Generator(FILE_OUTPATH,MAX_RECORD)
23 |   }
24 | 
25 | 
26 |   private def Generator(filePath: String, recordNum: Int) {
27 |     var write: FileWriter = null
28 |     try {
29 |       write = new FileWriter(filePath, true)
30 |       val rand = new Random()
31 |       val term = 1
32 |       for(i <- 1 to recordNum){
33 |         val MScore = generatorScore
34 |         val EScore = generatorScore
35 |         val CScore = generatorScore
36 |         val SScore = generatorScore
37 |         write.write(i + "," + term + "," + MScore + "," + EScore + "," + CScore + "," + SScore)
38 |         write.write(System.getProperty("line.separator"))
39 |         write.flush()
40 |       }
41 |     } catch {
42 |       case e => println("error")
43 |     }finally {
44 |       if (write != null)
45 |          write.close()
46 |     }
47 |   }
48 | 
49 |   private def generatorScore: Int = {
50 |     val rand = new Random()
51 |     val sc = rand.nextInt(100)
52 |     val score = sc match {
53 |       case s if(s >0 &&  s <10) =>  s + 80
54 |       case s if(s >10 &&  s < 30) =>  s + 70
55 |       case s if(s >30 &&  s < 50) =>  s + 40
56 |       case s if(s >50 &&  s < 60) =>  s + 20
57 |       case _ => sc
58 |     }
59 |     score
60 |   }
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/SecondSortBykey.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.Example
 2 | 
 3 | /**
 4 |   * Created by Mobin on 2017/9/3.
 5 |   */
 6 | class SecondSortBykey(val first: Int, val second: Int) extends Ordered [SecondSortBykey] with Serializable {
 7 |   def compare(other:SecondSortBykey):Int = {
 8 |     if (this.first - other.first !=0) {
 9 |       this.first - other.first
10 |     } else {
11 |       this.second - other.second
12 |     }
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/SexCount.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.Example;
 2 | 
 3 | import org.apache.hadoop.conf.Configurable;
 4 | import org.apache.hadoop.conf.Configuration;
 5 | import org.apache.hadoop.fs.Path;
 6 | import org.apache.hadoop.io.IntWritable;
 7 | import org.apache.hadoop.io.LongWritable;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapreduce.Job;
10 | import org.apache.hadoop.mapreduce.Mapper;
11 | import org.apache.hadoop.mapreduce.Reducer;
12 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
13 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
14 | 
15 | import java.io.IOException;
16 | 
17 | /**
18 |  * Created by Mobin on 2016/12/22.
19 |  * 性别统计
20 |  */
21 | public class SexCount {
22 |     static class SexMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
23 |         @Override
24 |         protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
25 |             String[] line = value.toString().split(",");
26 |             context.write(new Text(line[2]), new IntWritable(1));
27 |         }
28 |     }
29 | 
30 |     static class SexReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
31 |         @Override
32 |         protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
33 |             int count = 0;
34 |             for(IntWritable c : values)
35 |                 count += c.get();
36 |             context.write(key, new IntWritable(count));
37 |         }
38 |     }
39 | 
40 |     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
41 |         final  String INPUT_PATH = "Student.txt";
42 |         final  String OUT_PATH = "StudentSex";
43 |         Configuration conf = new Configuration();
44 |         Job job = Job.getInstance(conf);
45 | 
46 |         job.setMapperClass(SexMapper.class);
47 |         job.setReducerClass(SexReduce.class);
48 |         job.setJarByClass(SexCount.class);
49 | 
50 | 
51 |         job.setOutputKeyClass(Text.class);
52 |         job.setOutputValueClass(IntWritable.class);
53 | 
54 |         FileInputFormat.setInputPaths(job, INPUT_PATH);
55 |         FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
56 |         System.exit(job.waitForCompletion(true)?0:1);
57 | 
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/SparkJoin.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.Example
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | 
 6 | /**
 7 |   * Created by Mobin on 2016/12/22.
 8 |   */
 9 | object SparkJoin {
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setAppName("SparkJoin").setMaster("local")
12 |     val sc = new SparkContext(conf)
13 |     val student = sc.textFile("Student.t")
14 |     val scores = sc.textFile("Scores.txt")
15 |     val studentT = student.map(str => str.split(",")).map(x => (x(0), x(1) +"," + x(2) + "," +x(3) + "," + x(4)))
16 |     val scoresT = scores.map(str => str.split(",")).map(x => (x(0), x(1) +"," + x(2) + "," +x(3) + "," + x(4) + "," + x(5)))
17 |     studentT.join(scoresT).foreach(println)
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Example/StudentDataGenerator.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.Example
 2 | 
 3 | import java.io.FileWriter
 4 | 
 5 | import scala.util.Random
 6 | 
 7 | /**
 8 |   * Created by Mobin on 2016/12/22.
 9 |   * sno string, //学号
10 |   * name string,//姓名
11 |   * sex string, //性别
12 |   * age int,    //年龄
13 |   * class string //班级
14 |   */
15 | object StudentDataGenerator {
16 |   private val FILE_OUTPATH = "Student.txt"
17 |   private val MAX_RECORD = 10000;
18 | 
19 |   def main(args: Array[String]) {
20 |     Generator(FILE_OUTPATH, MAX_RECORD)
21 |   }
22 | 
23 |   private def Generator(filePath: String, recordNum: Int) {
24 | 
25 |     var write: FileWriter = null
26 |     try {
27 |       write = new FileWriter(filePath, true)
28 |       val rand = new Random();
29 |       for (i <- 1 to recordNum) {
30 |         val name = nameGenerator
31 |         val sex = sexGenerator
32 |         //年龄在20~22之间
33 |         val age = rand.nextInt(3) + 20
34 |         //班级
35 |         val classNum = rand.nextInt(6)
36 |         write.write(i + "," + name + "," + sex + "," + age + "," + classNum)
37 |         write.write(System.getProperty("line.separator"))
38 |         write.flush()
39 |       }
40 |       } catch {
41 |         case e => println("error")
42 |       } finally {
43 |         if (write != null)
44 |           write.close()
45 |       }
46 |   }
47 | 
48 |   //生成姓名
49 |   private def nameGenerator: String = {
50 |     val higthPos = (176 + Math.abs(new Random().nextInt(39)))
51 |     val lowPos = (176 + Math.abs(new Random().nextInt(93)))
52 |     val name = Array[Byte](new Integer(higthPos).byteValue(), new Integer(lowPos).byteValue())
53 |     val surname = Array("钟", "李", "张", "刘", "王", "章", "洪", "江", "戴")
54 |     surname(new Random().nextInt(9)) + new String(name, "GBK")
55 |   }
56 | 
57 |   //生成性别
58 |   private def sexGenerator: String = {
59 |     val random = new Random()
60 |     val randomNum = random.nextInt(2) + 1
61 |     randomNum % 2 match {
62 |       case 0 => "男"
63 |       case _ => "女"
64 |     }
65 |   }
66 | }
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/HDFS/FSUtils/CountFileLine.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.HDFS.FSUtils;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.FileSystem;
 5 | 
 6 | import java.io.File;
 7 | import java.io.IOException;
 8 | import java.util.ArrayList;
 9 | import java.util.concurrent.Callable;
10 | import java.util.concurrent.ExecutionException;
11 | import java.util.concurrent.Future;
12 | import java.util.concurrent.FutureTask;
13 | 
14 | /**
15 |  * Created by Mobin on 2016/12/20.
16 |  * 统计一个目录下的lzo文件的行数，每个lzo起一个task
17 |  */
18 | public class CountFileLine implements Callable<Integer>{
19 |     public FileSystem fs;
20 |     public String path;
21 | 
22 |     @Override
23 |     public Integer call() throws Exception {
24 |         return  countLine(fs,path);
25 |     }
26 | 
27 |     public Integer countLine(FileSystem fs,String path) throws IOException {
28 |         int count = 0;
29 |         FSUtils.BufferedReadIterable brl = new FSUtils.BufferedReadIterable(fs,path);
30 |         for(String line: brl){
31 |             count ++;
32 |         }
33 |         System.out.println(count);
34 |         return count;
35 |     }
36 | 
37 |     public static void main(String[] args) throws IOException, ExecutionException, InterruptedException {
38 |         int sum=0;
39 |         String file = "E:\\DATA\\PUBLIC\\NOCE\\AGG\\AGG_EVT_LTE_DPI_NEW\\hour=2016102011";
40 |         Configuration conf = new Configuration();
41 |         FileSystem fs = FileSystem.get(conf);
42 |         ArrayList<Future<Integer>> tasks = new ArrayList<>();
43 |         File[] files = new File(file).listFiles();
44 |         for(File f: files){
45 |             if(f.getName().endsWith(".lzo")){
46 |                 CountFileLine cd = new CountFileLine();
47 |                 cd.fs = fs;
48 |                 cd.path = f.getPath();
49 |                 FutureTask<Integer> task = new FutureTask<Integer>(cd);
50 |                 tasks.add(task);
51 |                 Thread thread = new Thread(task);
52 |                 System.out.println(thread.getName());
53 |                 thread.start();
54 |             }
55 |         }
56 | 
57 |         for(Future<Integer> future: tasks){
58 |             sum += future.get();
59 |         }
60 |         System.out.println(sum);
61 | 
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/HDFS/FSUtils/FSUtils.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.HDFS.FSUtils;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.FSDataInputStream;
 5 | import org.apache.hadoop.fs.FileSystem;
 6 | import org.apache.hadoop.fs.Path;
 7 | import org.apache.hadoop.io.compress.CompressionCodec;
 8 | import org.apache.hadoop.io.compress.CompressionCodecFactory;
 9 | import org.apache.hadoop.io.compress.CompressionInputStream;
10 | 
11 | import java.io.BufferedReader;
12 | import java.io.Closeable;
13 | import java.io.IOException;
14 | import java.io.InputStreamReader;
15 | import java.util.Iterator;
16 | 
17 | /**
18 |  * Created by Mobin on 2016/12/14.
19 |  * 统计一个目录下的lzo文件的行数，每个lzo起一个task
20 |  */
21 | public class FSUtils {
22 |     private static final Configuration conf = new Configuration();
23 |     private static final FileSystem fs = null;
24 | 
25 |     public static void main(String[] args) throws IOException {
26 |         String file = "E:\\DATA\\PUBLIC\\NOCE\\AGG\\AGG_EVT_LTE_DPI_NEW\\hour=2016102011\\m_p_0.txt.lzo";
27 |         int lineCount = 0;
28 |          Configuration conf = new Configuration();
29 |          FileSystem fs = FileSystem.get(conf);
30 |          try(BufferedReadIterable br = new BufferedReadIterable(fs,file)){
31 |              for(String line : br){
32 | 
33 |              }
34 |          }
35 | 
36 |     }
37 | 
38 |     public static BufferedReadIterable createBuferedReadIterable(FileSystem fs, String file) throws IOException {
39 |         return  new BufferedReadIterable(fs,file);
40 |     }
41 | 
42 |     public static class BufferedReadIterable implements Iterable<String>,Closeable{
43 |         private final String file;
44 |         private final long size;
45 |         private BufferedReader br;
46 | 
47 | 
48 |         public BufferedReadIterable(FileSystem fs, String file) throws IOException {
49 |             this.file = file;
50 |             Path path = new Path(file);
51 |             this.size = fs.getFileStatus(path).getLen();
52 | 
53 |             CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf());
54 |             //HDFS根据文件的后缀来确定使用的是哪种压缩算法
55 |             CompressionCodec codec = factory.getCodec(path);
56 | 
57 |             FSDataInputStream inputStream = fs.open(path,8192);
58 |             if(codec == null){
59 |                 br = new BufferedReader(new InputStreamReader(inputStream));
60 |             }else{
61 |                 //先解压再读取
62 |                 CompressionInputStream comIn  = codec.createInputStream(inputStream);
63 |                 br = new BufferedReader(new InputStreamReader(comIn));
64 |             }
65 |         }
66 | 
67 |         @Override
68 |         public void close() throws IOException {
69 |             br.close();
70 |         }
71 | 
72 |         @Override
73 |         public Iterator<String> iterator() {
74 |             return new Iterator<String>() {
75 |                 private String line;
76 |                 @Override
77 |                 public boolean hasNext() {
78 |                     try {
79 |                         line = br.readLine();
80 |                     } catch (IOException e) {
81 |                        line = null;
82 |                     }
83 |                     return line != null;
84 |                 }
85 | 
86 |                 @Override
87 |                 public String next() {
88 |                     return line;
89 |                 }
90 | 
91 |                 @Override
92 |                 public void remove() {
93 |                       throw new UnsupportedOperationException("remove");
94 |                 }
95 |             };
96 |         }
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/HDFS/HDFSCompressionCodec.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.HDFS;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.FSDataInputStream;
 5 | import org.apache.hadoop.fs.FSDataOutputStream;
 6 | import org.apache.hadoop.fs.FileSystem;
 7 | import org.apache.hadoop.fs.Path;
 8 | import org.apache.hadoop.io.IOUtils;
 9 | import org.apache.hadoop.io.compress.CompressionCodec;
10 | import org.apache.hadoop.io.compress.CompressionCodecFactory;
11 | import org.apache.hadoop.io.compress.CompressionInputStream;
12 | import org.apache.hadoop.io.compress.CompressionOutputStream;
13 | import org.apache.hadoop.util.ReflectionUtils;
14 | 
15 | import java.io.*;
16 | 
17 | /**
18 |  * Created by Mobin on 2016/12/19.
19 |  */
20 | public class HDFSCompressionCodec {
21 |     private static  final Configuration conf = new Configuration();
22 |     private static FileSystem fs = null;
23 |     //压缩
24 |     public void coder(String path) throws IOException, ClassNotFoundException {
25 |         //获取文件输入流
26 |         File dir = new File(path);
27 |         System.out.println(dir.isDirectory());
28 |         conf.set("mapred.output.compress", "true");
29 |         conf.set("mapred.output.compression.codec", "com.hadoop.compression.lzo.LzopCodec");
30 |         fs = FileSystem.get(conf);
31 |         FSDataOutputStream out = fs.create(new Path("E:\\DATA\\PUBLIC\\NOCE\\school5.lzo"));
32 |         Class<?> codecClass = Class.forName("com.hadoop.compression.lzo.LzopCodec");
33 |         CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
34 |         //将压缩数据写入到school.gz中
35 |         //创建CompressionInputStream来对文件进行压缩
36 |         CompressionOutputStream codecout = codec.createOutputStream(out);
37 |         for(File file:dir.listFiles() ) {
38 |             try (BufferedInputStream in = new BufferedInputStream(new FileInputStream(file))) {
39 |                 try{
40 |                     //最后个参数为true时同时关闭输出流和输入流
41 |                     IOUtils.copyBytes(in, codecout, 4096, false);
42 |                 }finally {
43 |                     IOUtils.closeStream(in);
44 |                 }
45 |             } catch (FileNotFoundException e) {
46 |                 e.printStackTrace();
47 |             } catch (IOException e) {
48 |                 e.printStackTrace();
49 |             }
50 |         }
51 |         out.flush();
52 |         out.close();
53 |     }
54 | 
55 |     //解压
56 |     public void decoder() throws IOException {
57 |         fs = FileSystem.get(conf);
58 |         CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf());
59 |         //根据文件的后缀名来确定使用的是哪种压缩算法
60 |         Path path = new Path("E:\\DATA\\PUBLIC\\NOCE\\school.gz");
61 |         CompressionCodec codec = factory.getCodec(path);
62 |         try(FSDataInputStream inputStream = fs.open(path,8096)){
63 |             //创建CompressionInputStream来对文件进行解压
64 |             CompressionInputStream comInputStream = codec.createInputStream(inputStream);
65 |             //将解压后的文件写到school.txt
66 |             FSDataOutputStream out = fs.create(new Path("E:\\DATA\\PUBLIC\\NOCE\\school5.txt"));
67 |             IOUtils.copyBytes(comInputStream,out,4096,false);
68 |             comInputStream.close();
69 |             out.close();
70 |         } catch (IOException e) {
71 |             e.printStackTrace();
72 |         }
73 |     }
74 | 
75 |     public static void main(String[] args) throws IOException, ClassNotFoundException {
76 |         String path = "E:\\DATA\\PUBLIC\\NOCE\\sch";
77 |         HDFSCompressionCodec codec = new HDFSCompressionCodec();
78 |         codec.coder(path);
79 |        codec.decoder();
80 |         Integer i = 0;
81 |         Integer o = 2;
82 |         i.equals(o);
83 |         Integer ii =i + o;
84 |     }
85 | }
86 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/HDFS/LzoCompress.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.HDFS;
 2 | 
 3 | import org.apache.hadoop.conf.Configuration;
 4 | import org.apache.hadoop.fs.FileSystem;
 5 | import org.apache.hadoop.fs.Path;
 6 | 
 7 | import java.io.IOException;
 8 | 
 9 | /**
10 |  * Created by Mobin on 2017/2/4.
11 |  */
12 | public class LzoCompress {
13 |     public static void main(String[] args)  {
14 |         Configuration conf = new Configuration();
15 |         try {
16 |          FileSystem fs = FileSystem.get(conf);
17 | 
18 | 
19 |         } catch (IOException e) {
20 |             e.printStackTrace();
21 |         }
22 |     }
23 | 
24 | 
25 | 
26 | //    public void LzoCoder(){
27 | //       try(){
28 | //
29 | //        }
30 | //    }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/HDFS/WriteToHDFS.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.HDFS
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | import scala.util.Random
 6 | 
 7 | /**
 8 |   * Created by hadoop on 3/6/16.
 9 |   * 为文件中的第一个字段拼接一个随机值再写入HDFS
10 |   */
11 | object WriteToHDFS {
12 |   def main(args: Array[String]) {
13 |     if (args.length < 2) {
14 |       System.err.println("Usage: WriteToHDFS <InputPath> <OutputPath>\n")
15 |       System.exit(1)
16 |     }
17 |     val conf = new SparkConf().setMaster("local").setAppName("WriteToHDFS")
18 |     val sc = new SparkContext(conf)
19 |     val sgfile = sc.textFile(args(0))
20 | 
21 |     val rdd = sgfile.map(lines => {
22 |       val line = lines.split("\\s")
23 |       if(line.length == 6){
24 |         val one = line(0) +"-"+ new Random().nextInt()
25 |         one+","+line(1)+","+line(2).getBytes+","+line(3)+","+line(4)+","+line(5)
26 |       }else   //如果这样写  一定不能只写if语句  还要加上else语句，否则没有通过if的，将被视了() 否则后期通过Phoenix导入到HBase中会因为字段不合法而报错
27 |         "mobin1"+","+"mobin2"+","+"mobin3"+"mobin4"+","+"mobin5"+","+"mobin6"
28 |     })
29 |     rdd.saveAsTextFile(args(1))
30 |     sc.stop()
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Kafka/KStream/KStreamDemo.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.Kafka.KStream;
 2 | 
 3 | import org.apache.kafka.clients.consumer.ConsumerConfig;
 4 | import org.apache.kafka.common.serialization.Serdes;
 5 | import org.apache.kafka.streams.KafkaStreams;
 6 | import org.apache.kafka.streams.StreamsBuilder;
 7 | import org.apache.kafka.streams.StreamsConfig;
 8 | import org.apache.kafka.streams.kstream.ForeachAction;
 9 | import org.apache.kafka.streams.kstream.KStream;
10 | import org.apache.kafka.streams.kstream.KTable;
11 | import org.apache.kafka.streams.kstream.Printed;
12 | 
13 | import java.util.Properties;
14 | 
15 | /**
16 |  * Created with IDEA
17 |  * Creater: MOBIN
18 |  * Date: 2018/8/19
19 |  * Time: 3:41 PM
20 |  */
21 | public class KStreamDemo {
22 |     private static final String APPLICATION_ID_CONFIG = "KStream-test";
23 |     private static final String BROKER_LIST = "localhost:9092";
24 |     private static final String TOPIC = "streams-foo";
25 |     private static StreamsBuilder streamsBuilder;
26 |     private static KStream<String, String> textLine;
27 | 
28 |     public static Properties initProperties(){
29 |         Properties properties = new Properties();
30 |         properties.put(StreamsConfig.APPLICATION_ID_CONFIG, APPLICATION_ID_CONFIG);
31 |         properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, BROKER_LIST);
32 |         properties.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
33 |         properties.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
34 |         properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
35 |         return properties;
36 |     }
37 | 
38 |     public static void printf() throws InterruptedException {
39 |         Properties properties = initProperties();
40 |         streamsBuilder = new StreamsBuilder();
41 |         textLine = streamsBuilder.stream(TOPIC);
42 |         textLine.foreach(new ForeachAction<String, String>() {
43 |             @Override
44 |             public void apply(String key, String value) {
45 |                 System.out.println(key + ":" + value);
46 |             }
47 |         });
48 |         KafkaStreams streams = new KafkaStreams(streamsBuilder.build(), properties);
49 |         streams.start();
50 |         Thread.sleep(5000L);
51 |         streams.close();
52 |     }
53 | 
54 |     public static void main(String[] args) throws InterruptedException {
55 |         KStreamDemo.printf();
56 |     }
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Kafka/Partition/StockPartitionor.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.Kafka.Partition;
 2 | 
 3 | import org.apache.kafka.clients.producer.Partitioner;
 4 | import org.apache.kafka.common.Cluster;
 5 | 
 6 | import java.util.Map;
 7 | 
 8 | /**
 9 |  * Created with IDEA
10 |  * Creater: MOBIN
11 |  * Date: 2018/8/16
12 |  * Time: 4:47 PM
13 |  * //写好自定义分区后在配置文件进行自定义分区配置
14 |  * properties.put("ProducerConfig.PARTITIONER_CLASS_CONFIG", StockPartitionor.class.getName)
15 |  */
16 | public class StockPartitionor implements Partitioner{
17 |     //分区数
18 |     private static final Integer PARTITIONS = 6;
19 |     @Override
20 |     public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
21 |         if (key == null){
22 |             return 0;
23 |         }
24 |         String stockcode = String.valueOf(key);
25 |         try {
26 |             int partitionID = Integer.valueOf(stockcode.substring(stockcode.length() - 2)) % PARTITIONS;
27 |             return partitionID;
28 |         }catch (NumberFormatException e){
29 |              return 0;
30 |         }
31 |     }
32 | 
33 |     @Override
34 |     public void close() {
35 | 
36 |     }
37 | 
38 |     @Override
39 |     public void configure(Map<String, ?> map) {
40 | 
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Kafka/Producers/KafkaProducerThread.java:
--------------------------------------------------------------------------------
  1 | package com.mobin.Kafka.Producers;
  2 | 
  3 | import org.apache.kafka.clients.producer.*;
  4 | import org.apache.kafka.common.serialization.StringSerializer;
  5 | import org.slf4j.Logger;
  6 | import org.slf4j.LoggerFactory;
  7 | import sun.util.resources.ga.LocaleNames_ga;
  8 | 
  9 | import java.text.DecimalFormat;
 10 | import java.util.Properties;
 11 | import java.util.Random;
 12 | import java.util.concurrent.ExecutorService;
 13 | import java.util.concurrent.Executors;
 14 | 
 15 | /**
 16 |  * Created with IDEA
 17 |  * Creater: MOBIN
 18 |  * Date: 2018/8/13
 19 |  * Time: 3:08 PM
 20 |  */
 21 | public class KafkaProducerThread implements Runnable {
 22 |     private static final int MSG_SIZE = 100;
 23 |     private static final String TOPIC = "stock-quotation4";
 24 |     private static final String BROKER_LIST = "localhost:9092";
 25 |     private static final Logger log = LoggerFactory.getLogger(KafkaProducerThread.class);
 26 |     private static KafkaProducer<String, String> producer = null;
 27 |     private ProducerRecord<String, String> record = null;
 28 | 
 29 |     public KafkaProducerThread(KafkaProducer<String, String> producer, ProducerRecord<String, String> record) {
 30 |         this.producer = producer;
 31 |         this.record = record;
 32 |     }
 33 | 
 34 |     @Override
 35 |     public void run() {
 36 |         System.out.println(producer + record.toString());
 37 |         producer.send(record, new Callback() {
 38 | 
 39 |             @Override
 40 |             public void onCompletion(RecordMetadata recordMetadata, Exception e) {
 41 |                 System.out.println("00000");
 42 |                 producer.send(record, new Callback() {
 43 |                     @Override
 44 |                     public void onCompletion(RecordMetadata recordMetadata, Exception e) {
 45 |                         if (null != e) {  //发送消息异常
 46 |                             log.error("发送消息异常...");
 47 |                         }
 48 |                         if (null != recordMetadata) {
 49 |                             log.info(String.format("offset:%s, partition:%s", recordMetadata.offset(), recordMetadata.partition()));
 50 |                         }
 51 |                     }
 52 |                 });
 53 |             }
 54 |         });
 55 |     }
 56 | 
 57 |     private static StockQuotationInfo createQuotationInfo() {
 58 |         StockQuotationInfo quotationInfo = new StockQuotationInfo();
 59 |         Random random = new Random();
 60 |         Integer stockCode = 600100 + random.nextInt();
 61 |         float r = (float) Math.random();
 62 |         if (r / 2 < 0.5) {
 63 |             r = -r;
 64 |         }
 65 |         DecimalFormat decimalFormat = new DecimalFormat(".00");
 66 |         quotationInfo.setCurrentPrice(Float.valueOf(decimalFormat.format(11 + r)));
 67 |         quotationInfo.setPreClosePrice(11.80f);
 68 |         quotationInfo.setOpenPrice(11.5f);
 69 |         quotationInfo.setLowPrice(10.5f);
 70 |         quotationInfo.setHighPrice(12.5f);
 71 |         quotationInfo.setStockCode(stockCode.toString());
 72 |         quotationInfo.setTradeTime(System.currentTimeMillis());
 73 |         quotationInfo.setStockName("股票-" + stockCode);
 74 |         return quotationInfo;
 75 |     }
 76 | 
 77 |     public static Properties initConfig() {
 78 |         Properties properties = new Properties();
 79 |         properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BROKER_LIST);
 80 |         properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
 81 |         properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
 82 |         return properties;
 83 |     }
 84 | 
 85 |     public static void main(String[] args) {
 86 |         Properties configs = initConfig();
 87 |         KafkaProducer<String, String> producer = new KafkaProducer<String, String>(configs);
 88 |         ProducerRecord<String, String> record;
 89 |         StockQuotationInfo quotationInfo;
 90 |         ExecutorService executorService = Executors.newFixedThreadPool(10);
 91 |         long current = System.currentTimeMillis();
 92 |         try {
 93 |             for (int i = 0; i < MSG_SIZE; i++) {
 94 |                 quotationInfo = createQuotationInfo();
 95 |                 record = new ProducerRecord<String, String>(TOPIC, null, quotationInfo.getTradeTime(),
 96 |                         quotationInfo.getStockCode(), quotationInfo.toString());
 97 |                 executorService.submit(new KafkaProducerThread(producer, record));
 98 |             }
 99 |         } catch (Exception e) {
100 |             System.out.println("-------");
101 |         } finally {
102 |             producer.close();
103 |             executorService.shutdown();
104 |         }
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Kafka/Producers/QuotationProducer.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.Kafka.Producers;
 2 | 
 3 | 
 4 | import org.apache.kafka.clients.producer.*;
 5 | import org.apache.kafka.common.serialization.StringSerializer;
 6 | import org.slf4j.Logger;
 7 | import org.slf4j.LoggerFactory;
 8 | 
 9 | import java.text.DecimalFormat;
10 | import java.util.Properties;
11 | import java.util.Random;
12 | 
13 | /**
14 |  * Created with IDEA
15 |  * Creater: MOBIN
16 |  * Date: 2018/8/13
17 |  * Time: 11:24 AM
18 |  */
19 | public class QuotationProducer {
20 |     private static final Logger log = LoggerFactory.getLogger(QuotationProducer.class);
21 |     private static final int MSG_SIZE = 100;
22 |     private static final String TOPIC = "stock-quotation";
23 |     private static final String BROKER_LIST = "localhost:9092";
24 |     private static KafkaProducer<String ,String> producer = null;
25 |     static {
26 |         Properties configs = initConfig();
27 |         producer = new KafkaProducer<String, String>(configs);
28 |     }
29 | 
30 |     public static Properties initConfig(){
31 |         Properties properties = new Properties();
32 |         properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, BROKER_LIST);
33 |         properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
34 |         properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
35 |         return properties;
36 |     }
37 | 
38 |     private static StockQuotationInfo createQuotationInfo(){
39 |         StockQuotationInfo quotationInfo = new StockQuotationInfo();
40 |         Random random = new Random();
41 |         Integer stockCode = 600100 + random.nextInt();
42 |         float r = (float) Math.random();
43 |         if (r / 2 < 0.5){
44 |             r = -r;
45 |         }
46 |         DecimalFormat decimalFormat = new DecimalFormat(".00");
47 |         quotationInfo.setCurrentPrice(Float.valueOf(decimalFormat.format(11 + r)));
48 |         quotationInfo.setPreClosePrice(11.80f);
49 |         quotationInfo.setOpenPrice(11.5f);
50 |         quotationInfo.setLowPrice(10.5f);
51 |         quotationInfo.setHighPrice(12.5f);
52 |         quotationInfo.setStockCode(stockCode.toString());
53 |         quotationInfo.setTradeTime(System.currentTimeMillis());
54 |         quotationInfo.setStockName("股票-" + stockCode);
55 |         return quotationInfo;
56 |     }
57 | 
58 |     public static void main(String[] args) {
59 |         ProducerRecord<String, String> record = null;
60 |         StockQuotationInfo quotationInfo = null;
61 |         try {
62 |             int num = 0;
63 |             for (int i = 0; i < MSG_SIZE; i ++){
64 |                 quotationInfo = createQuotationInfo();
65 |                 record = new ProducerRecord<String, String>(TOPIC,null, quotationInfo.getTradeTime(),quotationInfo.getStockCode()
66 |                 ,quotationInfo.toString());
67 |                 producer.send(record);
68 |                 //异步方式，指定Callback，实现onCompleteion
69 | //                producer.send(record, new Callback() {
70 | //                    @Override
71 | //                    public void onCompletion(RecordMetadata recordMetadata, Exception e) {
72 | //                        if (null != e){  //发送消息异常
73 | //                            log.error("发送消息异常...");
74 | //                        }
75 | //                        if (null != recordMetadata){
76 | //                            log.info(String.format("offset:%s, partition:%s", recordMetadata.offset(), recordMetadata.partition()));
77 | //                        }
78 | //                    }
79 | //                });
80 |                 if (num++ % 10 == 0){
81 |                     Thread.sleep(2000L);
82 |                 }
83 |             }
84 |         }catch (InterruptedException e){
85 | 
86 |         }finally {
87 |             producer.close();
88 |         }
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Kafka/Producers/StockQuotationInfo.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.Kafka.Producers;
 2 | 
 3 | import java.io.Serializable;
 4 | 
 5 | /**
 6 |  * Created with IDEA
 7 |  * Creater: MOBIN
 8 |  * Date: 2018/8/13
 9 |  * Time: 11:19 AM
10 |  */
11 | public class StockQuotationInfo implements Serializable{
12 |     private static final long serialVersionUID = 1L;
13 |     private String stockCode;
14 |     private String stockName;
15 |     private long tradeTime;
16 |     private float preClosePrice;
17 |     private float openPrice;
18 |     private float currentPrice;
19 |     private float highPrice;
20 |     private float lowPrice;
21 | 
22 |     public static long getSerialVersionUID() {
23 |         return serialVersionUID;
24 |     }
25 | 
26 |     public String getStockCode() {
27 |         return stockCode;
28 |     }
29 | 
30 |     public void setStockCode(String stockCode) {
31 |         this.stockCode = stockCode;
32 |     }
33 | 
34 |     public String getStockName() {
35 |         return stockName;
36 |     }
37 | 
38 |     public void setStockName(String stockName) {
39 |         this.stockName = stockName;
40 |     }
41 | 
42 |     public long getTradeTime() {
43 |         return tradeTime;
44 |     }
45 | 
46 |     public void setTradeTime(long tradeTime) {
47 |         this.tradeTime = tradeTime;
48 |     }
49 | 
50 |     public float getPreClosePrice() {
51 |         return preClosePrice;
52 |     }
53 | 
54 |     public void setPreClosePrice(float preClosePrice) {
55 |         this.preClosePrice = preClosePrice;
56 |     }
57 | 
58 |     public float getOpenPrice() {
59 |         return openPrice;
60 |     }
61 | 
62 |     public void setOpenPrice(float openPrice) {
63 |         this.openPrice = openPrice;
64 |     }
65 | 
66 |     public float getCurrentPrice() {
67 |         return currentPrice;
68 |     }
69 | 
70 |     public void setCurrentPrice(float currentPrice) {
71 |         this.currentPrice = currentPrice;
72 |     }
73 | 
74 |     public float getHighPrice() {
75 |         return highPrice;
76 |     }
77 | 
78 |     public void setHighPrice(float highPrice) {
79 |         this.highPrice = highPrice;
80 |     }
81 | 
82 |     public float getLowPrice() {
83 |         return lowPrice;
84 |     }
85 | 
86 |     public void setLowPrice(float lowPrice) {
87 |         this.lowPrice = lowPrice;
88 |     }
89 | 
90 |     @Override
91 |     public String toString() {
92 |         return stockCode + "|" +stockName+ "|" +tradeTime+ "|" +preClosePrice+ "|" +openPrice
93 |                 + "|" +currentPrice+ "|" +highPrice+ "|" +lowPrice;
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Kafka/Topic.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.Kafka;
 2 | 
 3 | 
 4 | import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 5 | import org.apache.kafka.clients.admin.*;
 6 | import org.apache.kafka.common.KafkaFuture;
 7 | import org.apache.kafka.common.config.ConfigResource;
 8 | 
 9 | import java.util.*;
10 | import java.util.concurrent.ExecutionException;
11 | 
12 | /**
13 |  * Created with IDEA
14 |  * Creater: MOBIN
15 |  * Date: 2018/8/12
16 |  * Time: 5:18 PM
17 |  */
18 | public class Topic {
19 |     private static final String ZK_CONNECT = "localhost:2181";
20 |     //ZK连接session过期时间
21 |     private static final int SESSION_TIMEOUT = 30000;
22 |     //连接超时时间
23 |     private static final int CONNECT_TIMEOUT = 30000;
24 | 
25 |     public static void createTopic(AdminClient adminClient,String topic, int partition, short replica, Properties conf){
26 | 
27 |         Map<String, String> configs = new HashMap<>();
28 |         try {
29 |             CreateTopicsResult result = adminClient.createTopics(Arrays.asList(new NewTopic(topic, partition, replica).configs(configs)));
30 |         }catch (Exception e){
31 | 
32 |         }finally {
33 |             adminClient.close();
34 |         }
35 |     }
36 | 
37 |     public static void deleteTopic(AdminClient adminClient,String topic, Properties conf){
38 |         adminClient.create(conf);
39 |         KafkaFuture future = adminClient.deleteTopics(Arrays.asList(topic)).all();
40 |         try {
41 |             future.get();
42 |         } catch (InterruptedException e) {
43 |             e.printStackTrace();
44 |         } catch (ExecutionException e) {
45 |             e.printStackTrace();
46 |         }
47 |     }
48 | 
49 |     public static void updateTopicConfig(AdminClient adminClient, String topic) throws ExecutionException, InterruptedException {
50 |         Config config = new Config(Arrays.asList(new ConfigEntry("max.message.bytes","404800")));
51 |         adminClient.alterConfigs(Collections.singletonMap(new ConfigResource(ConfigResource.Type.TOPIC, topic), config)).all().get();
52 |     }
53 | 
54 |     public static void showTopic(AdminClient adminClient, String topic) throws ExecutionException, InterruptedException {
55 |         DescribeTopicsResult topicsResult = adminClient.describeTopics(Arrays.asList(topic));
56 |         Map<String, TopicDescription> map = topicsResult.all().get();
57 |         for (Map.Entry<String, TopicDescription> entry: map.entrySet()){
58 |             System.out.println(entry.getKey() + " : " + entry.getValue());
59 |         }
60 | 
61 |     }
62 | 
63 |     //查询所有Topics
64 |     public static void showAllTopic(AdminClient adminClient) throws ExecutionException, InterruptedException {
65 |         ListTopicsOptions options = new ListTopicsOptions();
66 |         options.listInternal(true);
67 |         ListTopicsResult result = adminClient.listTopics(options);
68 |         Set<String> topicName = result.names().get();
69 |         System.out.println(topicName);
70 |     }
71 | 
72 |     public static void main(String[] args) throws ExecutionException, InterruptedException {
73 |         String TOPIC = "APITopic";
74 |         Properties conf = new Properties();
75 |         conf.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
76 |         AdminClient adminClient = AdminClient.create(conf);
77 | //        Topic.createTopic(adminClient,"APITopic",1, (short) 1, conf);
78 | //        Topic.deleteTopic(adminClient, TOPIC ,conf);
79 | //        Topic.updateTopicConfig(adminClient, TOPIC);
80 | //        Topic.showTopic(adminClient, TOPIC);
81 |         Topic.showAllTopic(adminClient);
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Kafka/consumers/KafkaConsumerThread.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.Kafka.consumers;
 2 | 
 3 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 4 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 5 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 6 | 
 7 | import java.util.Arrays;
 8 | import java.util.Properties;
 9 | 
10 | /**
11 |  * Created with IDEA
12 |  * Creater: MOBIN
13 |  * Date: 2018/8/16
14 |  * Time: 3:50 PM
15 |  * 6个消费者线程消费同一个主题
16 |  */
17 | public class KafkaConsumerThread extends Thread {
18 |     //每个线程拥有私有的KafkaConsumer实例
19 |     private KafkaConsumer<String, String> consumer;
20 | 
21 |     public KafkaConsumerThread(Properties consumerConfig, String topic) {
22 |         this.consumer = new KafkaConsumer<String, String>(consumerConfig);
23 |         consumer.subscribe(Arrays.asList(topic));
24 |     }
25 | 
26 |     @Override
27 |     public void run() {
28 |         try {
29 |             while (true) {
30 |                 ConsumerRecords<String, String> records = consumer.poll(1000);
31 |                 for (ConsumerRecord<String, String> record : records) {
32 |                     System.out.printf("partition = %d, offset = %d, key = %s value = %s%n",
33 |                             record.partition(), record.offset(), record.key(), record.value());
34 |                 }
35 |             }
36 |         } catch (Exception e) {
37 |             e.printStackTrace();
38 |         } finally {
39 |             consumer.close();
40 |         }
41 |     }
42 | 
43 |     public static void main(String[] args) {
44 |         Properties properties = new Properties();
45 |         properties.put("bootstrap.servers", "localhost:9092");
46 |         properties.put("group.id", "test");
47 |         properties.put("enable.auto.commit", true);
48 |         properties.put("auto.commit.interval.ms", 1000);//设置偏移量提交时间
49 |         properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
50 |         properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
51 |         for (int i = 0; i < 6; i ++){
52 |             new KafkaConsumerThread(properties, "stock-quotation").start();
53 |         }
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Kafka/consumers/QuotationConsumer.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.Kafka.consumers;
 2 | 
 3 | import org.apache.kafka.clients.consumer.ConsumerRecord;
 4 | import org.apache.kafka.clients.consumer.ConsumerRecords;
 5 | import org.apache.kafka.clients.consumer.KafkaConsumer;
 6 | 
 7 | import java.util.Arrays;
 8 | import java.util.Properties;
 9 | 
10 | /**
11 |  * Created with IDEA
12 |  * Creater: MOBIN
13 |  * Date: 2018/8/14
14 |  * Time: 3:40 PM
15 |  */
16 | public class QuotationConsumer {
17 |     private static final String BROKERS_LIST = "localhost:9092";
18 |     private static final String GROUP_ID = "test";
19 |     private static final String CLIENT_ID = "test";
20 |     private static final String TOPIC = "stock-quotation";
21 |     private static KafkaConsumer<String, String> consumer;
22 | 
23 |     static {
24 |         Properties properties = initPorerties();
25 |         consumer = new KafkaConsumer<String, String>(properties);
26 |     }
27 | 
28 |     public static Properties initPorerties(){
29 |         Properties properties = new Properties();
30 |         properties.put("bootstrap.servers", BROKERS_LIST);
31 |         properties.put("group.id", GROUP_ID);
32 |         properties.put("client.id", CLIENT_ID);
33 |         properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
34 |         properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
35 |         return properties;
36 |     }
37 | 
38 |     public static void poll(){
39 |         consumer.subscribe(Arrays.asList(TOPIC));
40 |         try {
41 |           while (true){
42 |               ConsumerRecords<String, String> records = consumer.poll(1000);
43 |               for (ConsumerRecord<String, String> record: records){
44 |                   System.out.printf("partition = %d, offset = %d, key = %s value = %s%n",
45 |                           record.partition(), record.offset(), record.key(), record.value());
46 |               }
47 |           }
48 |         }catch (Exception e){
49 | 
50 |         }finally {
51 |             consumer.close();
52 |         }
53 |     }
54 | 
55 |     public static void main(String[] args) {
56 |         QuotationConsumer.poll();
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Kafka/consumers/QuotationConsumerManualCommit.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.Kafka.consumers;
 2 | 
 3 | import org.apache.kafka.clients.consumer.*;
 4 | import org.apache.kafka.common.TopicPartition;
 5 | import org.codehaus.janino.IClass;
 6 | 
 7 | import java.util.Arrays;
 8 | import java.util.Collection;
 9 | import java.util.Map;
10 | import java.util.Properties;
11 | 
12 | /**
13 |  * Created with IDEA
14 |  * Creater: MOBIN
15 |  * Date: 2018/8/14
16 |  * Time: 3:40 PM
17 |  * 每处理完10消息提交一次
18 |  */
19 | public class QuotationConsumerManualCommit {
20 |     private static final String BROKERS_LIST = "localhost:9092";
21 |     private static final String GROUP_ID = "test";
22 |     private static final String CLIENT_ID = "test";
23 |     private static final String TOPIC = "stock-quotation";
24 |     private static KafkaConsumer<String, String> consumer;
25 | 
26 |     static {
27 |         Properties properties = initPorerties();
28 |         consumer = new KafkaConsumer<String, String>(properties);
29 |     }
30 | 
31 |     public static Properties initPorerties(){
32 |         Properties properties = new Properties();
33 |         properties.put("bootstrap.servers", BROKERS_LIST);
34 |         properties.put("group.id", GROUP_ID);
35 |         properties.put("client.id", CLIENT_ID);
36 |         properties.put("fetch.max.bytes", 1024);  //设置一次fetch请求取得的数据最大值为1kb，默认为5MB，这里是为了方便测试
37 |         properties.put("enable.auto.commit", false);  //手动提交偏移量
38 |         properties.put("client.id", CLIENT_ID);
39 |         properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
40 |         properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
41 |         return properties;
42 |     }
43 | 
44 |     public static void poll(){
45 |         consumer.subscribe(Arrays.asList(TOPIC,"stock-quotation1"), new ConsumerRebalanceListener() {
46 |             @Override
47 |             public void onPartitionsRevoked(Collection<TopicPartition> collection) {
48 |             }
49 | 
50 |             @Override
51 |             public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
52 |                long committedOffset = -1;
53 |                for (TopicPartition topicPartition: partitions){
54 | //                   System.out.println(consumer.committed(topicPartition));
55 | //                   committedOffset = consumer.committed(topicPartition).offset();
56 | //                   System.out.println("当前"+topicPartition+"偏移量："+committedOffset);
57 |                    consumer.seekToBeginning(partitions);
58 |                }
59 |             }
60 |         });
61 |         try {
62 |             int minCommitSize = 10;//最少处理10条消息后才进行提交
63 |             int count = 0; //消息计算器
64 |           while (true){
65 |              ConsumerRecords<String, String> records = consumer.poll(1000);
66 |              for (ConsumerRecord<String, String> record: records){
67 |                  System.out.printf("topic = %s, partition = %d, offset = %d, key = %s value = %s%n",
68 |                          record.topic(),record.partition(), record.offset(), record.key(), record.value());
69 |                  count ++;
70 |              }
71 |              if (count >= minCommitSize) {
72 |                  consumer.commitAsync(new OffsetCommitCallback() {
73 |                      @Override
74 |                      public void onComplete(Map<TopicPartition, OffsetAndMetadata> map, Exception e) {
75 |                          if (null == e){
76 |                              System.out.println("提交成功");
77 |                          }else {
78 |                              System.out.println("提交发生了异常");
79 |                          }
80 |                      }
81 |                  });
82 |                  count = 0;
83 |              }
84 |           }
85 |         }catch (Exception e){
86 |             e.printStackTrace();
87 |         }finally {
88 |             consumer.close();
89 |         }
90 |     }
91 | 
92 |     public static void main(String[] args) {
93 |         QuotationConsumerManualCommit.poll();
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/Action/Aggregate.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.Action
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/20/16.
 7 |   * seqOp函数将每个分区的数据聚合成类型为U的值，comOp函数将各分区的U类型数据聚合起来得到类型为U的值
 8 |   */
 9 | object Aggregate {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setMaster("local").setAppName("Fold")
13 |     val sc = new SparkContext(conf)
14 |     val rdd = sc.parallelize(List(1,2,3,4),2)
15 |     val aggregateRDD = rdd.aggregate(2)(_+_,_ * _)
16 |     println(aggregateRDD)
17 |     sc.stop
18 |   }
19 | 
20 |   /**
21 |     * 步骤1：分区1：zeroValue+1+2=5   分区2：zeroValue+3+4=9
22 | 
23 |      步骤2：2*分区1的结果*分区2的结果=90
24 |     */
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/Action/Fold.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.Action
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/20/16.
 7 |   * 通过op函数聚合各分区中的元素及合并各分区的元素，op函数需要两个参数，在开始时第一个传入的参数为zeroValue,T为RDD数据集的数据类型
 8 |   */
 9 | object Fold {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setMaster("local").setAppName("Fold")
13 |     val sc = new SparkContext(conf)
14 |     val rdd = sc.parallelize(Array(("a", 1), ("b", 2), ("a", 2), ("c", 5), ("a", 3)), 2)
15 |     val foldRDD = rdd.fold(("d", 0))((val1, val2) => {
16 |       if (val1._2 >= val2._2) val1 else val2
17 |     })
18 |     println(foldRDD)
19 |     sc.stop
20 |   }
21 | 
22 |   /**
23 |     * 1.开始时将(“d”,0)作为op函数的第一个参数传入，将Array中和第一个元素("a",1)作为op函数的第二个参数传入，并比较value的值，
24 |     * 返回value值较大的元素
25 | 
26 |     * 2.将上一步返回的元素又作为op函数的第一个参数传入，Array的下一个元素作为op函数的第二个参数传入，比较大小
27 | 
28 |     * 3.重复第2步骤
29 |     */
30 | }
31 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/Action/Func.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.Action
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/19/16.
 7 |   * reduce(func):通过函数func先聚集各分区的数据集，再聚集分区之间的数据，func接收两个参数，返回一个新值，
 8 |   * 新值再做为参数继续传递给函数func，直到最后一个元素
 9 |   */
10 | object Func {
11 | 
12 |   def main(args: Array[String]) {
13 |     val conf = new SparkConf().setMaster("local").setAppName("reduce")
14 |     val sc = new SparkContext(conf)
15 |     val rdd = sc.parallelize(1 to 10,2)
16 |     val reduceRDD = rdd.reduce(_ + _)
17 |     val reduceRDD1 = rdd.reduce(_ - _) //如果分区数据为1结果为 -53
18 |     val countRDD = rdd.count()
19 |     val firstRDD = rdd.first()
20 |     val takeRDD = rdd.take(5)
21 |     val topRDD = rdd.top(3)
22 |     val takeOrderedRDD = rdd.takeOrdered(3)
23 |     println("func +: "+reduceRDD)
24 |     println("func -: "+reduceRDD1)
25 |     println("count: "+countRDD)
26 |     println("first: "+firstRDD)
27 |     println("take:")
28 |     takeRDD.foreach(x => print(x +" "))
29 |     println("\ntop:")
30 |     topRDD.foreach(x => print(x +" "))
31 |     println("\ntakeOrdered:")
32 |     takeOrderedRDD.foreach(x => print(x +" "))
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/Action/KVFunc.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.Action
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | import scala.concurrent.Future
 6 | 
 7 | /**
 8 |   * Created by hadoop on 4/19/16.
 9 |   */
10 | object KVFunc {
11 | 
12 |   def main(args: Array[String]) {
13 |     val conf = new SparkConf().setMaster("local").setAppName("KVFunc")
14 |     val sc = new SparkContext(conf)
15 |     val arr = List(("A", 1), ("B", 2), ("A", 2), ("B", 3))
16 |     val rdd = sc.parallelize(arr,2)
17 |     val countByKeyRDD = rdd.countByKey()
18 |     val collectAsMapRDD = rdd.collectAsMap()
19 |     val lookupRDD = rdd.lookup("A")
20 |     println("countByKey:")
21 |     countByKeyRDD.foreach(print)
22 |     println("\ncollectAsMap:")
23 |     collectAsMapRDD.foreach(print)
24 |     println("\nlookup:")
25 |     lookupRDD.foreach(x => print(x))
26 |     sc.stop
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/Cartesian.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/8/16.
 7 |   * 对两个RDD中的所有元素进行笛卡尔积操作
 8 |   */
 9 | object Cartesian {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setMaster("local").setAppName("map")
13 |     val sc = new SparkContext(conf)
14 | 
15 |     val rdd1 = sc.parallelize(1 to 3)
16 |     val rdd2 = sc.parallelize(2 to 5)
17 |     val cartesianRDD = rdd1.cartesian(rdd2)
18 | 
19 |     cartesianRDD.foreach(x => println(x + " "))
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/Coalesce.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/9/16.
 7 |   * 对RDD的分区进行重新分区，shuffle默认值为false,当shuffle=false时，不能增加分区数
 8 |     目,但不会报错，只是分区个数还是原来的
 9 |   */
10 | object Coalesce {
11 | 
12 |   def main(args: Array[String]) {
13 |     val conf = new SparkConf().setMaster("local").setAppName("map")
14 |     val sc = new SparkContext(conf)
15 |     val rdd = sc.parallelize(1 to 16,4)
16 |     rdd.foreachPartition(iter => print(iter.toList+ " | "))
17 |     val coalesceRDD = rdd.coalesce(3)   //当suffle的值为false时，不能增加分区数(如分区数不能从5->7)
18 |    // val coalesceRDD = rdd.coalesce(5,true)
19 |     println("重新分区后的分区个数:"+coalesceRDD.partitions.size)
20 |     println("RDD依赖关系:"+coalesceRDD.toDebugString)
21 |     coalesceRDD.foreachPartition(iter => print(iter.toList+ " | "))
22 |     sc.stop
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/Distinct.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/8/16.
 7 |   * 对RDD中的元素进行去重
 8 |   */
 9 | object Distinct {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setMaster("local").setAppName("map")
13 |     val sc = new SparkContext(conf)
14 |     val list = List(1,1,2,5,2,9,6,1)
15 |     val distinctRDD = sc.parallelize(list)
16 |     val unionRDD = distinctRDD.distinct() //union   intersection
17 |     unionRDD.collect.foreach(x => print(x + " "))
18 |     sc.stop()
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/FlatMap.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/7/16.
 7 |   * 与map类似，但每个元素输入项都可以被映射到0个或多个的输出项，最终将结果”扁平化“后输出
 8 |   */
 9 | object FlatMap {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setMaster("local").setAppName("flatmap")
13 |     val sc = new SparkContext(conf)
14 |     val rdd = sc.parallelize(1 to 5)
15 |     val fm = rdd.flatMap(x => (1 to x))
16 |     fm.foreach( x => print(x + " "))
17 |     sc.stop()
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/Glom.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/9/16.
 7 |   * 将RDD的每个分区中的类型为T的元素转换换数组Array[T]
 8 |   */
 9 | object Glom {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setMaster("local").setAppName("map")
13 |     val sc = new SparkContext(conf)
14 |     val rdd = sc.parallelize(1 to 16,4)
15 |     val glomRDD = rdd.glom()   //RDD[Array[T]]
16 |     glomRDD.foreach(rdd => println(rdd.getClass.getSimpleName))
17 |     sc.stop
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/MakeRDD.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by Mobin on 2017/7/28.
 7 |   */
 8 | object MakeRDD {
 9 |   def main(args: Array[String]) {
10 |     val conf = new SparkConf().setMaster("local").setAppName("makeRDD")
11 |     val sc = new SparkContext(conf)
12 |     val collection = Seq((1 to 10, Seq("master","slave1")),
13 |       (11 to 15, Seq("slave2","slave3")))
14 |     var rdd = sc.makeRDD(collection)
15 |     println(rdd.partitions.size)
16 |     println(rdd.preferredLocations(rdd.partitions(0)))
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/Map.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/7/16.
 7 |   * 数据集中的每个元素经过用户自定义的函数转换形成一个新的RDD，新的RDD叫MappedRDD
 8 |   */
 9 | object Map {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setMaster("local").setAppName("map")
13 |     val sc = new SparkContext(conf)
14 |     val rdd = sc.parallelize(1 to 10)  //创建RDD
15 |     val map = rdd.map(_*2)       //对RDD中的每个元素都乘于2
16 |     map.foreach(x => print(x+" "))
17 |     sc.stop()
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/MapPartitions.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/7/16.
 7 |   * mapPartitions:类似与map，map作用于每个分区的每个元素，但mapPartitions作用于每个分区
 8 |   *
 9 |   * mapPartitionsWithIndex:将[mapPartitionsWithIndex]注释部分去掉即是
10 |   * 与mapPartitions类似，不同的时函数多了个分区索引的参数
11 |   */
12 | object MapPartitions {
13 |  // 输出有女性的名字：
14 |   def partitionsFun(/*[mapPartitionsWithIndex]  index : Int,*/iter : Iterator[(String,String)]) : Iterator[String] = {
15 |     var woman = List[String]()
16 |     while (iter.hasNext){
17 |       val next = iter.next()
18 |       next match {
19 |         case (_,"female") => woman = /*[mapPartitionsWithIndex]"["+index+"]"+*/next._1 :: woman
20 |         //case (_,"female") => woman =  next._1.toList .:: (woman)  错误写法
21 |         case _ =>
22 |       }
23 |     }
24 |     return  woman.iterator
25 |   }
26 | 
27 | 
28 |   def main(args: Array[String]) {
29 |     val conf = new SparkConf().setMaster("local").setAppName("flatmap")
30 |     val sc = new SparkContext(conf)
31 |     val l = List(("kpop","female"),("zorro","male"),("mobin","male"),("lucy","female"))
32 |     val rdd = sc.parallelize(l,2)
33 |    // val mp = rdd.mapPartitions(x => x.filter(_._2 == "female")).map(x => x._1)
34 |     val mp = rdd.mapPartitions(partitionsFun)
35 |     //[mapPartitionsWithIndex]  val mp = rdd.mapPartitionsWithIndex(partitionsFun)
36 |     mp.collect.foreach(x => (print(x +" "))) //将分区中的元素转换成Aarray再输出
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/MapPartitionsWithIndex.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by Mobin on 2017/7/29.
 7 |   */
 8 | object MapPartitionsWithIndex {
 9 | 
10 |   def mappartitionWithIndexFun(x : Int, iter :Iterator[Int])={
11 |     var result = List[String]()
12 |     var i = 0
13 |     while (iter.hasNext) {
14 |       i += iter.next()
15 |     }
16 |     result.::(x + "|" + i).iterator
17 |   }
18 | 
19 |   def main(args: Array[String]) {
20 |     val conf = new SparkConf().setMaster("local").setAppName("mappartitionsWithIndex")
21 |     val sc = new SparkContext(conf)
22 |     val rdd1 = sc.makeRDD(1 to 5,2)
23 |     val rdd2 = rdd1.mapPartitionsWithIndex{
24 |       (x, iter) => {
25 |         var result = List[String]()
26 |         var i = 0
27 |         while (iter.hasNext){
28 |           i += iter.next()
29 |         }
30 |         result.::(x + "|" + i).iterator
31 |       }
32 |     }
33 |     val rdd3 = rdd1.mapPartitionsWithIndex(mappartitionWithIndexFun)
34 |     rdd3.foreach(println)
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/RandomSplit.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/9/16.
 7 |   * 根据weight权重值将一个RDD划分成多个RDD,权重越高划分得到的元素较多的几率就越大
 8 |   */
 9 | object RandomSplit {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setMaster("local").setAppName("map")
13 |     val sc = new SparkContext(conf)
14 |     val rdd = sc.parallelize(1 to 10)
15 |     val randomSplitRDD = rdd.randomSplit(Array(1.0,2.0,7.0))
16 |     randomSplitRDD(0).foreach(x => print(x +" gg"))
17 |     randomSplitRDD(1).foreach(x => print(x +" rr"))
18 |     randomSplitRDD(2).foreach(x => print(x +" tt"))
19 |     sc.stop
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/Sample.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/7/16.
 7 |   */
 8 | object Sample {
 9 | 
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("map")
12 |     val sc = new SparkContext(conf)
13 |     val rdd = sc.parallelize(1 to 10)
14 |     val sample1 = rdd.sample(true,0.5,0)
15 |     sample1.collect.foreach(x => print(x + " "))
16 |     sc.stop
17 |   }
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/Union.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/8/16.
 7 |   * :将两个RDD中的数据集进行合并，最终返回两个RDD的并集，若RDD中存在相同的元素也不会去重
 8 |   */
 9 | object Union {
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("map")
12 |     val sc = new SparkContext(conf)
13 |     val rdd1 = sc.parallelize(1 to 4)
14 |     val rdd2 = sc.parallelize(3 to 5)
15 |     val unionRDD = rdd1.intersection(rdd2) //union  intersection
16 |     unionRDD.collect.foreach(x => print(x + " "))
17 |     sc.stop()
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/BaseRDD/ZipWithIndex.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.BaseRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by Mobin on 2017/7/29.
 7 |   */
 8 | object ZipWithIndex {
 9 |   def main(args: Array[String]) {
10 |     val conf = new SparkConf().setMaster("local").setAppName("ZipWithIndex")
11 |     val sc = new SparkContext(conf)
12 |     val rdd1 = sc.makeRDD(Seq("A","B","C","D","E","F"),2)
13 |     rdd1.zipWithIndex().foreach(println)
14 |     rdd1.zipWithUniqueId().foreach(println)
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/AggregateAndFold.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by Mobin on 2017/7/30.
 7 |   */
 8 | object AggregateAndFold {
 9 |   def main(args: Array[String]) {
10 |     val conf = new SparkConf().setMaster("local").setAppName("AggregateFold")
11 |     val sc = new SparkContext(conf)
12 |     val rdd1 = sc.makeRDD(1 to 10, 2)
13 |     val rs = rdd1.aggregate(1)(
14 |       (x,y) => x + y,
15 |       (a,b) => a+ b
16 |     )
17 |     val rs1 = rdd1.fold(1)((x,y) => x+ y)
18 |     println(rs)
19 |     println(rs1)
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/Cogroup.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/12/16.
 7 |   */
 8 | object Cogroup {
 9 | 
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("ReduceByKey")
12 |     val sc = new SparkContext(conf)
13 |     val arr = List(("A", 1), ("B", 2), ("A", 2), ("B", 3))
14 |     val arr1 = List(("A", "A1"), ("B", "B1"), ("A", "A2"), ("B", "B2"))
15 |     val rdd = sc.parallelize(arr, 3)
16 |     val rdd1 = sc.parallelize(arr1, 3)
17 |     val groupByKeyRDD = rdd.cogroup(rdd1)
18 |     groupByKeyRDD.foreach(println)
19 |     println(groupByKeyRDD.toDebugString)
20 |     sc.stop
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/CombineByKey.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{HashPartitioner, SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/11/16.
 7 |   * 统计男性和女生的个数，并以（性别，（名字，名字....），个数）的形式输出
 8 |   */
 9 | object CombineByKey {
10 | 
11 |   def main(args: Array[String]) {
12 |     /*
13 |     def createCombine = (x: String) => (x, 1)
14 |     def mergerValue = (peo: (String, Int), x: String) => (peo._1+","+x, peo._2 + 1)
15 |     def mergeCombine = (sex1: (String, Int), sex2: (String, Int)) => (sex1._1 +","+ sex2._1, sex1._2 + sex2._2)*/
16 |     val conf = new SparkConf().setMaster("local").setAppName("combinByKey")
17 |     val sc = new SparkContext(conf)
18 |     val people = List(("male", "Mobin"), ("male", "Kpop"), ("female", "Lucy"), ("male", "Lufei"), ("female", "Amy"))
19 |     val rdd = sc.parallelize(people)
20 |     val combinByKeyRDD = rdd.combineByKey(
21 |       (x: String) => (List(x), 1),
22 |       (peo: (List[String], Int), x: String) => (x :: peo._1, peo._2 + 1),
23 |       (sex1: (List[String], Int), sex2: (List[String], Int)) => (sex1._1 ::: sex2._1, sex1._2 + sex2._2))
24 | 
25 |     combinByKeyRDD.foreach(println)
26 |     println(combinByKeyRDD.toDebugString)
27 | 
28 |     /**
29 |       * (1) ShuffledRDD[1] at combineByKey at CombineByKey.scala:20 []
30 |       * +-(1) ParallelCollectionRDD[0] at parallelize at CombineByKey.scala:19 []
31 |       */
32 |     sc.stop()
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/CombineByKey1.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/11/16.
 7 |   */
 8 | object CombineByKey1 {
 9 | 
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("combinByKey")
12 |     val sc = new SparkContext(conf)
13 |     val rdd = sc.parallelize(List(
14 |       ("A", 3), ("A", 9), ("A", 12), ("A", 0), ("A", 5), ("B", 4),
15 |       ("B", 10), ("B", 11), ("B", 20), ("B", 25), ("C", 32), ("C", 91),
16 |       ("C", 122), ("C", 3), ("C", 55)), 2)
17 | 
18 |     val combineByKeyRDD = rdd.combineByKey(
19 |       (x: Int) => (x, 1),
20 |       (acc: (Int, Int), x) => (acc._1 + x, acc._2 + 1),
21 |       (acc1: (Int, Int), acc2: (Int, Int)) => (acc1._1 + acc2._1, acc1._2 + acc2._2))
22 | 
23 |     combineByKeyRDD.foreach(println)
24 |     sc.stop()
25 |   }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/FlatMapValus.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/10/16.
 7 |   * 对[K,V]型数据中的V值flatmap操作
 8 |   */
 9 | object FlatMapValus {
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("map")
12 |     val sc = new SparkContext(conf)
13 |     val list = List(("mobin",22),("kpop",20),("lufei",23))
14 |     val rdd = sc.parallelize(list)
15 |     val mapValuesRDD = rdd.flatMapValues(x => Seq(x,"male"))
16 |     mapValuesRDD.foreach(println)
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/FoldByKey.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/11/16.
 7 |   */
 8 | object FoldByKey {
 9 | 
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("FoldByKey")
12 |     val sc = new SparkContext(conf)
13 |     val people = List(("Mobin", 2), ("Mobin", 1), ("Lucy", 2), ("Amy", 1), ("Lucy", 3))
14 |     val rdd = sc.parallelize(people)
15 |     val foldByKeyRDD = rdd.foldByKey(2)(_ + _)
16 |     foldByKeyRDD.foreach(println)
17 |     sc.stop
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/GroupByKey.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/12/16.
 7 |   */
 8 | object GroupByKey {
 9 | 
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("ReduceByKey")
12 |     val sc = new SparkContext(conf)
13 |     val arr = List(("A", 1), ("B", 2), ("A", 2), ("B", 3))
14 |     val rdd = sc.parallelize(arr)
15 |     val groupByKeyRDD = rdd.groupByKey()
16 |     groupByKeyRDD.foreach(println)
17 |     sc.stop
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/Join.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/12/16.
 7 |   */
 8 | object Join {
 9 | 
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("ReduceByKey")
12 |     val sc = new SparkContext(conf)
13 |     val arr = List(("A", 1), ("B", 2))
14 |     val arr1 = List(("A", "A1"), ("B", "B1"),("B", "B1"))
15 | 
16 |     /*val arr = List(("A", 1), ("B", 2), ("A", 2), ("B", 3),("C",1))
17 |     val arr1 = List(("A", "A1"), ("B", "B1"), ("A", "A2"), ("B", "B2"))
18 |     leftOuterJoin
19 |     */
20 | 
21 |    /*val arr = List(("A", 1), ("B", 2), ("A", 2), ("B", 3))
22 |    val arr1 = List(("A", "A1"), ("B", "B1"), ("A", "A2"), ("B", "B2"),("C","C1"))
23 |    rightOuterJoin*/
24 |     val rdd = sc.parallelize(arr, 3)
25 |     val rdd1 = sc.parallelize(arr1, 3)
26 |     val rightOutJoinRDD = rdd.fullOuterJoin(rdd1)
27 |     rightOutJoinRDD.foreach(println)
28 |     println(rightOutJoinRDD.toDebugString)
29 |     sc.stop
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/MapJoinJava.java:
--------------------------------------------------------------------------------
 1 | //package com.mobin.SparkRDDFun.TransFormation.KVRDD;
 2 | //
 3 | //import org.apache.spark.SparkConf;
 4 | //import org.apache.spark.SparkContext;
 5 | //import org.apache.spark.api.java.JavaRDD;
 6 | //import org.apache.spark.api.java.JavaSparkContext;
 7 | //import org.apache.spark.api.java.function.FlatMapFunction;
 8 | //import org.apache.spark.api.java.function.Function;
 9 | //import org.apache.spark.api.java.function.PairFunction;
10 | //import org.apache.spark.broadcast.Broadcast;
11 | //import scala.Tuple2;
12 | //
13 | //import java.util.ArrayList;
14 | //import java.util.Iterator;
15 | //import java.util.List;
16 | //import java.util.Map;
17 | //
18 | ///**
19 | // * Created by Mobin on 2016/11/14.
20 | // */
21 | //public class MapJoinJava {
22 | //    public static void main(String[] args) {
23 | //        SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("JavaMapSide");
24 | //        JavaSparkContext sc = new JavaSparkContext(conf);
25 | //        JavaRDD<String> table =  sc.textFile("mapjoin.txt");
26 | //        JavaRDD<String> table1 =  sc.textFile("mapjoin1.txt");
27 | //
28 | //        final Map<String, String> pairs = table.mapToPair(new PairFunction<String, String, String>() {
29 | //            public Tuple2<String, String> call(String s) throws Exception {
30 | //                int pos = s.indexOf(",");
31 | //                return new Tuple2<String, String>(s.substring(0,pos), s.substring(pos + 1));
32 | //            }
33 | //        }).collectAsMap();
34 | //
35 | //       // final Broadcast<Map<String,String>> broadcast = sc.broadcast(pairs);
36 | //
37 | //        table1.mapToPair(new PairFunction<String, String, String>() {
38 | //            public Tuple2<String, String> call(String s) throws Exception {
39 | //                int pos = s.indexOf(",");
40 | //                return new Tuple2<String, String>(s.substring(0,pos), s.substring(pos + 1));
41 | //            }
42 | //        }).mapPartitions(new FlatMapFunction<Iterator<Tuple2<String,String>>, Tuple2<String,List<String>>>() {
43 | //            public Iterable<Tuple2<String, List<String>>> call(Iterator<Tuple2<String, String>> tuple2Iterator) throws Exception {
44 | //                List<Tuple2<String, List<String>>> list = null;
45 | //                List l = new ArrayList();
46 | //                while (tuple2Iterator.hasNext()){
47 | //                    Tuple2<String,String> map = tuple2Iterator.next();
48 | //                    if (pairs.containsKey(map._1)){
49 | //                        if(list == null)
50 | //                            list = new ArrayList();
51 | //
52 | //                            l.add(pairs.get(map._1));
53 | //                            l.add(map._2);
54 | //                            list.add(new Tuple2<String, List<String>>(map._1,l));
55 | //                    }
56 | //                }
57 | //                return list;
58 | //            }
59 | //        }).saveAsTextFile("javaMapJoin");
60 | //    }
61 | //
62 | //
63 | //}
64 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/MapSideJoin.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by Mobin on 2016/11/14.
 7 |   */
 8 | object MapSideJoin {
 9 | 
10 |   def main(args: Array[String]) {
11 |     val  conf = new SparkConf().setMaster("local[2]").setAppName("Mobin")
12 |     val sc = new SparkContext(conf)
13 |     val table = sc.textFile("mapjoin.txt")
14 |     val table1 = sc.textFile("mapjoin1.txt")
15 |     val paisr = table.map{ x =>
16 |       var pos = x.indexOf(",")
17 |       (x.substring(0,pos),x.substring(pos+1))
18 |     }.collectAsMap()
19 | 
20 |     //    var broadcastMap = sc.broadcast(paisr)
21 | 
22 |     val result = table1.map{ x =>
23 |       var pos = x.indexOf(",")
24 |       (x.substring(0,pos),x.substring(pos + 1))
25 |     }.mapPartitions({ iter =>
26 |       //  var m = broadcastMap.value
27 |       for {
28 |         (key, value) <- iter
29 |         if paisr.contains(key)
30 |       }yield(key,(value , paisr.get(key).getOrElse("")))
31 |     })
32 | 
33 |     result.saveAsTextFile("result")
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/MapValues.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{HashPartitioner, SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/10/16.
 7 |   * 对[K,V]型数据中的V值map操作
 8 |   */
 9 | object MapValues {
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("map")
12 |     val sc = new SparkContext(conf)
13 |     val list = List(("mobin",22),("kpop",20),("lufei",23))
14 |     val rdd = sc.parallelize(list)
15 |     val mapValuesRDD = rdd.mapValues(x => Seq(x,"male"))
16 |     mapValuesRDD.foreach(println)
17 |   }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/PartitionBy.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{Partitioner, HashPartitioner, SparkContext, SparkConf}
 4 | import scala.collection.mutable.{Map}
 5 | 
 6 | /**
 7 |   * Created by hadoop on 4/10/16.
 8 |   */
 9 | object PartitionBy {
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("partitonby")
12 |     val sc = new SparkContext(conf)
13 |     val rdd1 = sc.makeRDD(Array((10,"A"), (20, "B"), (30,"C"), (40,"D")), 2)
14 |     rdd1.mapPartitionsWithIndex{
15 |       (partitionID, iter) => {
16 |         var partiton_map = Map[String, List[(Int, String)]]()
17 |         while(iter.hasNext){
18 |           val partition_name = "part_" + partitionID
19 |           var elem = iter.next()
20 |           if (partiton_map.contains(partition_name)){
21 |             var elems = partiton_map(partition_name)
22 |             elem :: elems
23 |           }else {
24 |             partiton_map(partition_name) = List[(Int, String)]{elem}
25 |           }
26 |         }
27 |         partiton_map.iterator
28 |       }
29 |     }
30 |     rdd1.foreach(println)
31 |     val rdd2 = rdd1.partitionBy(new HashPartitioner(2))
32 |     var rdd3 = rdd1.groupByKey(new Partitioner() {
33 |       override def numPartitions: Int = 10
34 | 
35 |       override def getPartition(key: Any): Int = {
36 | 
37 |         val id = key.asInstanceOf[Int]
38 |         println(id)
39 |         if (id % 2 ==0) {
40 |               id / 4
41 |         }else{
42 |            id % 4
43 |         }
44 |       }
45 |     })
46 |     rdd3.foreach(println)
47 |   }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/ReduceByKey.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/12/16.
 7 |   */
 8 | object ReduceByKey {
 9 | 
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("ReduceByKey")
12 |     val sc = new SparkContext(conf)
13 |     val arr = List(("A",1),("B",2),("A",2),("B",3))
14 |     val rdd = sc.parallelize(arr)
15 |     val reduceByKeyRDD = rdd.reduceByKey(_ +_)
16 |     reduceByKeyRDD.foreach(println)
17 |     sc.stop
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkRDDFun/TransFormation/KVRDD/SortByKey.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkRDDFun.TransFormation.KVRDD
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | /**
 6 |   * Created by hadoop on 4/12/16.
 7 |   */
 8 | object SortByKey {
 9 | 
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("ReduceByKey")
12 |     val sc = new SparkContext(conf)
13 |     val arr = List(("A",1),("B",2),("A",2),("B",3))
14 |     val rdd = sc.parallelize(arr)
15 |     val sortByKeyRDD = rdd.sortByKey()
16 |     sortByKeyRDD.foreach(println)
17 |     sc.stop
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkSQL/PeopleDemo.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkSQL
 2 | 
 3 | import org.apache.spark.sql.SQLContext
 4 | import org.apache.spark.{SparkContext, SparkConf}
 5 | 
 6 | /**
 7 |   * Created by Mobin on 2016/11/28.
 8 |   */
 9 | object PeopleDemo {
10 |   def main(args: Array[String]) {
11 |     val conf =new SparkConf().setAppName("people").setMaster("local")
12 |     val sc = new SparkContext(conf)
13 |     val sqlContext = new SQLContext(sc)
14 |     val df = sqlContext.jsonFile("people.json")
15 |     df.show()
16 |     df.printSchema()
17 |     printf("select name------")
18 |    df.select("name").show()
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkSQL/RowNumber.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkSQL
 2 | 
 3 | import org.apache.spark.sql.{DataFrame, SQLContext}
 4 | import org.apache.spark.{SparkContext, SparkConf}
 5 | 
 6 | /**
 7 |   * Created by Mobin on 2016/12/1.
 8 |   */
 9 | object RowNumber {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setAppName("rownum").setMaster("local")
13 |     val sc = new SparkContext(conf)
14 |     val sqlContext = new SQLContext(sc)
15 |     import sqlContext.implicits._
16 |     //  val dpiDF = sc.textFile("F:\\AGG_EVT_LTE_DPI_NEW.txt").map(x => x.split("\\|")).
17 |     //      filter(x => x.length >= 30 &&  x(14).toDouble > 0 &&  x(15).toDouble > 0 && x(3) != "" && x(18) != "").
18 |     //      map(x => DPI(x(3),x(18),x(14).toDouble,x(15).toDouble, x(14).toDouble+x(15).toDouble)).toDF()
19 |     //
20 |     //    dpiDF.registerTempTable("dpi")
21 |     //    //dpiDF.groupBy("MDN").agg("size_ul" -> "sum","size_dl" -> "sum")
22 |     //   // dpiDF.select("APP").groupBy("APP").count().select("count").show()
23 |     // //  sqlContext.sql("SELECT MDN,APP,size_ul,size_dl,sum(s) FROM dpi").show()
24 |     //    dpiDF.printSchema()
25 |     //  //  val sDF = dpiDF.groupBy("MDN","APP").agg("s" -> "sum").registerTempTable("tmp")//每个用户对应的的APP的流量q
26 |     //  sqlContext.sql("SELECT MDN,s,COUNT(1) FROM dpi GROUP BY s").show()
27 |     // //   sDF.groupBy("MDN").agg("SUM(s)" -> "sum")
28 | //    println("count.....")
29 | //    val acc = sc.accumulator(0, "ac")
30 | //    sc.textFile("/DATA/PUBLIC/NOCE/SGC/SGC_LTE_CDR_DAY/day=20161125/00*").foreach(
31 | //      line => if(line.length > 0)  acc += 1
32 | //    )
33 |     //println("line:" + acc.value)
34 | 
35 |   //  dpiDF.show()
36 | //    val mr = sc.textFile("E:\\DATA\\PUBLIC\\NOCE\\ETL\\ETL_4G_MRO_ERS\\20161020\\2016102011\\e_p_3_1.txt")
37 | //      .map(x => (x.split("\\|")(3),2.10))
38 | //      .filter(x => x!="").distinct().toDF()
39 | //    val chr = sc.textFile("E:\\DATA\\PUBLIC\\NOCE\\AGG\\AGG_MRO_CHR_RELATE\\day=20161020\\hour=2016102011\\vendor=ERS\\10\\agg_data_172_17_1_2_ad7fc9ad_3930_4da8_97cc_a2a476f2333f.txt")
40 | //      .map(x => (x.split("\\|")(1),1.8)).filter(x => x !="").distinct().toDF()
41 | //
42 | //    val rs = mr.unionAll(chr).count()
43 | //    println(rs)
44 | //    sc.stop()
45 | 
46 |     sc.textFile("F:\\m_p_50_3.txt.lzo").map(x => x.split(",")(0)).foreach(
47 |       println(_)
48 |     )
49 | 
50 |     sc.stop()
51 | 
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/SparkSQL/SGC_LET_SHOOL_HOUR.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.SparkSQL
 2 | 
 3 | import org.apache.spark.sql.{DataFrame, SQLContext}
 4 | import org.apache.spark.{SparkContext, SparkConf}
 5 | 
 6 | /**
 7 |   * Created by Mobin on 2016/11/28.
 8 |   */
 9 | object SGC_LET_SHOOL_HOUR {
10 | 
11 |   case  class  School(school_name: String,  school_id: String, enodeb: Int)
12 |   case  class  Mr(enodebID: Int, MSISDN: String)
13 | 
14 |   def main(args: Array[String]) {
15 |     val conf = new SparkConf().setAppName("SGC_LET_SCHOOL_HOUR").setMaster("local")
16 |     val sc = new SparkContext(conf)
17 |     val sqlContext = new SQLContext(sc)
18 |     import sqlContext.implicits._
19 |     val school: DataFrame = sc.textFile("E:\\DATA\\PUBLIC\\NOCE\\school.csv").map(x => x.split("\\|")).map(s => School(s(1), s(3),Integer.parseInt(s(5)))).toDF()
20 |     val mr = sc.textFile("F:\\2.10.txt").map(s => s.split("\\|")).map(mr => Mr(Integer.parseInt(mr(1)),mr(11))).toDF()
21 |     school.registerTempTable("school")
22 |     mr.registerTempTable("mr")
23 |     school.select("school_name")
24 |     val joinDF = school.join(mr,$"enodeb" === $"enodebID").select("school_id","school_name","MSISDN").distinct
25 |     val countDF  = joinDF.select("school_id","school_name").groupBy("school_id","school_name")
26 |     countDF.count().rdd.saveAsTextFile("F:\\SCHOOL.txt")
27 | 
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/Telecom/AirPlaneMode.scala:
--------------------------------------------------------------------------------
  1 | package com.mobin.Telecom
  2 | 
  3 | import java.text.SimpleDateFormat
  4 | 
  5 | import org.apache.hadoop.conf.Configuration
  6 | import org.apache.hadoop.fs._
  7 | import org.apache.spark.{SparkContext, SparkConf}
  8 | 
  9 | /**
 10 |   * Created by Mobin on 2017/7/15.
 11 |   * 飞行模式计算
 12 |   */
 13 | object AirPlaneMode {
 14 |   private val MSISDN = 6
 15 |   private val IDENTIFICATION = 8
 16 |   private val STARTTIME = 0
 17 |   private val ENB = 12
 18 |   private val MAX_NUM = 13
 19 |   private val chrSourcePath = "/DATA/PUBLIC/NOCE/ETL/ETL_CHR_L_MM/"
 20 | 
 21 |   def airPlainModeMain(fs: FileSystem, sc: SparkContext, day: String, dateTime :String, isPersist: Boolean): Unit ={
 22 |     val source = chrSourcePath + day + "/" + "*/*"
 23 |     print(source)
 24 |     val paresRdd = sc.textFile(source).mapPartitions(iterFunc)
 25 |     val splicRdd = paresRdd.reduceByKey(reduceByKeyFun)
 26 |     val statisticRdd = splicRdd.mapPartitions(statisticFun)
 27 |     statisticRdd.count()
 28 |   }
 29 | 
 30 |   def statisticFun(iter: Iterator[(String, (String,Int))]) = {
 31 |     var list = List[(String, String)]()
 32 |     while(iter.hasNext) {
 33 |       var sum = 0
 34 |       val data = iter.next()
 35 |       val str = data._2._1.split("\\|")
 36 |       for (m <- str) {
 37 |         val str1 = m.split(",")   //分隔出<时间，编号>
 38 |         if (str1.length == 2 && "0".equals(str1(1))) {
 39 |           for (n <- str) {
 40 |             val str2 = n.split(",")  //分隔出<时间， 编号>
 41 |             if (str2.length == 2 && ("1".equals(str2(1)) || "2".equals(str2(1)))) {
 42 |               val t1 =paresTime(str2(0))
 43 |               val t2 = paresTime(str1(0))
 44 |               if (!t1.isEmpty && !t2.isEmpty && t1.get - t2.get < 2000 && t1.get - t2.get > 0){
 45 |                 sum = sum +1
 46 |               }
 47 |             }
 48 |           }
 49 |         }
 50 |       }
 51 |       println(data._1, sum + "," + data._2._2)
 52 |       list = (data._1, sum + "," + data._2._2) :: list
 53 |     }
 54 |     list.iterator
 55 |   }
 56 | 
 57 |   def paresTime(time: String) : Option[Long] = {
 58 |     val timeFormat = "yyyy-MM-dd HH:mm:ss SSS"
 59 |     val month = time.substring(5, 8)
 60 |     var t = time
 61 |     if (!month.contains("-")){
 62 |       month match {
 63 |         case "Jan" => t = t.replace(month, "01")
 64 |         case "Feb" => t = t.replace(month, "02")
 65 |         case "Mar" => t = t.replace(month, "03")
 66 |         case "Apr" => t = t.replace(month, "04")
 67 |         case "May" => t = t.replace(month, "05")
 68 |         case "Jun" => t = t.replace(month, "06")
 69 |         case "Jul" => t = t.replace(month, "07")
 70 |         case "Aug" => t = t.replace(month, "08")
 71 |         case "Sep" => t = t.replace(month, "09")
 72 |         case "Oct" => t = t.replace(month, "10")
 73 |         case "Nov" => t = t.replace(month, "11")
 74 |         case "Dec" => t = t.replace(month, "12")
 75 |         case _ => None
 76 |       }
 77 |     }
 78 |     var startTime: Option[Long] = None
 79 |     try {
 80 |         startTime = Some(new SimpleDateFormat(timeFormat).parse(time).getTime)
 81 |         return startTime
 82 |     }catch {
 83 |       case  e: Exception => None
 84 |     }
 85 |   }
 86 | 
 87 |   def reduceByKeyFun(x1: (String, Int), x2: (String, Int)): (String,Int) = {
 88 |     val sum = x1._2 + x2._2
 89 |     println(x1._1 + "ppp")
 90 |     if (",".equals(x1._1)){
 91 |       if (!",".equals(x2._1)){
 92 |           (x2._1, sum)
 93 |       } else {
 94 |         ("", sum)
 95 |       }
 96 |     } else {
 97 |       if (!",".equals(x2._1)) {
 98 |         (x1._1 + "|" + x2._1, sum)
 99 |       } else {
100 |         (x1._1, sum)
101 |       }
102 |     }
103 |   }
104 | 
105 | 
106 |   def  iterFunc(iter: Iterator[String]) = {
107 |     var list = List[(String, (String, Int))]()
108 |     while (iter.hasNext ) {
109 |       val str = iter.next().split(",")
110 |       val enb:String = str(ENB)
111 |       val mdn = str(MSISDN)
112 |       val time = str(STARTTIME)
113 |       val airplane = str(IDENTIFICATION)
114 |       var tp = ""
115 |       airplane match {
116 |         case _ if "0x05".equals(airplane) =>  tp = time + ",0"
117 |         case _ if "0x00".equals(airplane) || "0x18".equals(airplane) => tp = time + ",1"
118 |         case _ => "" + ","
119 |       }
120 |       val enb_mdn = String.format("%s,%s", mdn, String.valueOf(Integer.parseInt(enb.substring(3), 16)))
121 |        list = (enb_mdn, (tp, 1))::list
122 |     }
123 |     list.iterator
124 |   }
125 | 
126 |   def main(args: Array[String]) {
127 |     val conf = new SparkConf().setAppName("airPlainMode").setMaster("local")
128 |     val sc = new SparkContext(conf)
129 |     val configuration = new Configuration()
130 |     val fs = FileSystem.newInstance(configuration)
131 |     airPlainModeMain(fs, sc, "20170322", "", false)
132 |   }
133 | }
134 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/FileStreaming.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 5 | 
 6 | /**
 7 |   * Created by hadoop on 3/29/16.
 8 |   */
 9 | object FileStreaming {
10 |   def main(args: Array[String]) {
11 |     val conf = new SparkConf().setMaster("local").setAppName("FileStreaming")
12 |     val sc = new StreamingContext(conf,Seconds(5))
13 |     val lines = sc.textFileStream("/home/hadoop/word")
14 |     val words = lines.flatMap(_.split(" "))
15 |     val wordCounts = words.map(x => (x , 1)).reduceByKey(_ + _)
16 |     sc.start()
17 |     sc.awaitTermination()
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/Flume/SampleLogGenerator.java:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming.Flume;
 2 | 
 3 | import java.io.*;
 4 | 
 5 | /**
 6 |  * Created with IDEA
 7 |  * Creater: MOBIN
 8 |  * Date: 2018/8/2
 9 |  * Time: 4:39 PM
10 |  */
11 | public class SampleLogGenerator {
12 |     public static void main(String[] args) throws IOException, InterruptedException {
13 |             String location = "/Users/mobin/Downloads/access_log/access1_log";
14 |             File f = new File(location);
15 |             FileOutputStream writer = new FileOutputStream(f);
16 |             File read = new File("/Users/mobin/Downloads/access_log/access_log");
17 |             BufferedReader reader = new BufferedReader(new FileReader(read));
18 |             for(;;){
19 |                 System.out.println("....");
20 |                 writer.write((reader.readLine() + "\n").getBytes());
21 |                 writer.flush();
22 |                 Thread.sleep(500);
23 |             }
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/Flume/ScalaLoadDistributedEvents.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming.Flume
 2 | 
 3 | import java.io.ObjectOutputStream
 4 | import java.net.InetSocketAddress
 5 | 
 6 | import org.apache.spark.SparkConf
 7 | import org.apache.spark.rdd.RDD
 8 | import org.apache.spark.storage.StorageLevel
 9 | import org.apache.spark.streaming.dstream.DStream
10 | import org.apache.spark.streaming.flume.{FlumeUtils, SparkFlumeEvent}
11 | import org.apache.spark.streaming.{Seconds, StreamingContext}
12 | 
13 | /**
14 |   * Created with IDEA
15 |   * Creater: MOBIN
16 |   * Date: 2018/8/2
17 |   * Time: 3:22 PM
18 |   */
19 | object ScalaLoadDistributedEvents {
20 | 
21 |   def main(args: Array[String]): Unit = {
22 |     println("Creating Spark Configuration")
23 |     val conf = new SparkConf().setMaster("local[2]").setAppName("streaimg data loading App");
24 |     println("Retreivinf Streaming Context from Spark Conf")
25 |     val streamContext = new StreamingContext(conf, Seconds(2))
26 | 
27 |     //创建一个包含所有机器地址和端口的InetSocketAdress数组
28 |     var address = new Array[InetSocketAddress](1)
29 |     address(0) = new InetSocketAddress("localhost",9998)
30 | 
31 |     //创建一个Flume轮询流，每隔2s从Sink中拉取事件
32 |     //1. maxBatchSize:单个RPC中从Spakr Sink中拉取事件的最大数目
33 |     //2. 这个Stream发送给Sink的并发请求数目T
34 |     val flumeStream = FlumeUtils.createPollingStream(streamContext, address, StorageLevel.MEMORY_AND_DISK_SER_2,1000, 1)
35 | 
36 |     val outputStream = new ObjectOutputStream(Console.out)
37 |     printValues(flumeStream, streamContext, outputStream)
38 |     streamContext.start()
39 |     streamContext.awaitTermination()
40 |   }
41 | 
42 | 
43 |   def printValues(stream: DStream[SparkFlumeEvent], context: StreamingContext, outputStream: ObjectOutputStream): Unit ={
44 |     stream.foreachRDD(foreachFunc)
45 |     def foreachFunc = (rdd: RDD[SparkFlumeEvent]) => {
46 |       val array = rdd.collect()
47 |       println("Start Printing Results")
48 |       println("Total size of Events = " + array.size)
49 |       for (flumeEvent <- array){
50 |         //从SparkFlumeEvent得到AvorFlumeEvent
51 |         val payLoad = flumeEvent.event.getBody
52 |         println(new String(payLoad.array()))
53 |       }
54 |       println("finish......")
55 |     }
56 |   }
57 | }
58 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/Flume/ScalaLogAnalyzerJson.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming.Flume
 2 | 
 3 | import java.util.regex.{Matcher, Pattern}
 4 | 
 5 | import com.alibaba.fastjson.JSON
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | /**
13 |   * Created with IDEA
14 |   * Creater: MOBIN
15 |   * Date: 2018/8/4
16 |   * Time: 2:36 PM
17 |   */
18 | class ScalaLogAnalyzerJson extends Serializable {
19 | 
20 |   def tansformLogDataIntoJson(logLine: String): String = {
21 |     val LOG_ENTRY_PATTERN = """^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+) (\S+)" (\d{3}) (\S+)"""
22 |     val PATTERN = Pattern.compile(LOG_ENTRY_PATTERN)
23 |     val matcher = PATTERN.matcher(logLine)
24 | 
25 |     if (!matcher.find()){
26 |       println("Cannot parse logline" + logLine)
27 |     }
28 |     import scala.collection.JavaConversions._
29 |     val json = scala.util.parsing.json.JSONObject(createDataMap(matcher)).toString()
30 |     println(json)
31 |     return json
32 |   }
33 | 
34 |   def createDataMap(matcher: Matcher): Map[String, String] = {
35 |     Map[String, String](
36 |       ("IP" -> matcher.group(1)),
37 |       ("client" -> matcher.group(2)),
38 |       ("user" -> matcher.group(3)),
39 |       ("date" -> matcher.group(4)),
40 |       ("method" -> matcher.group(5)),
41 |       ("request" -> matcher.group(6)),
42 |       ("protocol" -> matcher.group(7)),
43 |       ("respCode" -> matcher.group(8)),
44 |       ("size" -> matcher.group(9))
45 |     )
46 |   }
47 | 
48 | }
49 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/Flume/ScalaLogAnalyzerMap.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming.Flume
 2 | 
 3 | import java.util.regex.{Matcher, Pattern}
 4 | 
 5 | 
 6 | 
 7 | /**
 8 |   * Created with IDEA
 9 |   * Creater: MOBIN
10 |   * Date: 2018/8/4
11 |   * Time: 2:36 PM
12 |   */
13 | class ScalaLogAnalyzerMap extends Serializable {
14 | 
15 |   def tansformLogData(logLine: String): Map[String, String] = {
16 |     val LOG_ENTRY_PATTERN = """^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+) (\S+)" (\d{3}) (\S+)"""
17 |     val PATTERN = Pattern.compile(LOG_ENTRY_PATTERN)
18 |     val matcher = PATTERN.matcher(logLine)
19 | 
20 |     if (!matcher.find()){
21 |       println("Cannot parse logline" + logLine)
22 |     }
23 |     createDataMap(matcher)
24 |   }
25 | 
26 |   def createDataMap(matcher: Matcher): Map[String, String] = {
27 |     Map[String, String](
28 |       ("IP" -> matcher.group(1)),
29 |       ("client" -> matcher.group(2)),
30 |       ("user" -> matcher.group(3)),
31 |       ("date" -> matcher.group(4)),
32 |       ("method" -> matcher.group(5)),
33 |       ("request" -> matcher.group(6)),
34 |       ("protocol" -> matcher.group(7)),
35 |       ("respCode" -> matcher.group(8)),
36 |       ("size" -> matcher.group(9))
37 |     )
38 |   }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/Flume/ScalaQueryingStreams.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming.Flume
 2 | 
 3 | import java.net.InetSocketAddress
 4 | 
 5 | import org.apache.spark.{SparkConf, SparkContext}
 6 | import org.apache.spark.rdd.RDD
 7 | import org.apache.spark.sql.SQLContext
 8 | import org.apache.spark.storage.StorageLevel
 9 | import org.apache.spark.streaming.dstream.DStream
10 | import org.apache.spark.streaming.flume.FlumeUtils
11 | import org.apache.spark.streaming.{Seconds, StreamingContext}
12 | 
13 | /**
14 |   * Created with IDEA
15 |   * Creater: MOBIN
16 |   * Date: 2018/8/4
17 |   * Time: 2:49 PM
18 |   */
19 | object ScalaQueryingStreams {
20 |   def main(args: Array[String]): Unit = {
21 |     val conf = new SparkConf().setAppName("Apache Log Transformer")
22 |     val sparkContext = new SparkContext(conf)
23 |     val streamCtx = new StreamingContext(sparkContext, Seconds(10))
24 | 
25 |     var address = new Array[InetSocketAddress](1)
26 |     address(0) = new InetSocketAddress("localhost", 9998)
27 |     val flumeStream = FlumeUtils.createPollingStream(streamCtx, address, StorageLevel.MEMORY_AND_DISK_SER_2, 1000, 1)
28 |     val transformLog = new ScalaLogAnalyzerJson()
29 |     val newDStream = flumeStream.map{
30 |       x => transformLog.tansformLogDataIntoJson(new String(x.event.getBody.array()))
31 |     }
32 |     val wStream = newDStream.window(Seconds(40), Seconds(20))
33 |     wStream.foreachRDD{
34 |       rdd =>
35 |         val sqlCtx = getInstance(sparkContext)
36 |         //通过JSONRDD将 JSONRDD转换为SQL DataFrame
37 |         val df = sqlCtx.jsonRDD(rdd)
38 |         df.registerTempTable("apacheLogData")
39 |         //打印结构类型
40 |         df.printSchema()
41 |         val logDataFrame = sqlCtx.sql("SELECT method,count(*) as total FROM apacheLogData GROUP BY method")
42 |         logDataFrame.show()
43 |     }
44 | 
45 |     streamCtx.start()
46 |     streamCtx.awaitTermination()
47 | 
48 |   }
49 | 
50 | 
51 |   def executeTransformations(dstream: DStream[(String, String)], context: StreamingContext): Unit ={
52 |     printLogValues(dstream,context)
53 |     println("++++++")
54 |     dstream.filter(x => x._1.equals("method") && x._2.contains("GET")).count().print()
55 |     println("++++++")
56 | 
57 |   }
58 | 
59 |   def printLogValues(stream: DStream[(String, String)], context: StreamingContext){
60 |     stream.foreachRDD(foreachFunc)
61 | 
62 |     def foreachFunc = (rdd: RDD[(String,String)]) => {
63 |       val array = rdd.collect()
64 |       for (dataMap <- array.array){
65 |         println(dataMap._1 + "------" + dataMap._2)
66 |       }
67 |     }
68 |   }
69 | 
70 |   @transient private var instance: SQLContext = null
71 | 
72 |   //延迟初始化SQLContext
73 |   def getInstance(sparkContext: SparkContext): SQLContext =
74 |     synchronized{
75 |       if (instance == null) {
76 |         instance = new SQLContext(sparkContext)
77 |       }
78 |       instance
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/Flume/ScalaTransformLogEvents.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming.Flume
 2 | 
 3 | import java.net.InetSocketAddress
 4 | 
 5 | import org.apache.spark.SparkConf
 6 | import org.apache.spark.rdd.RDD
 7 | import org.apache.spark.storage.StorageLevel
 8 | import org.apache.spark.streaming.dstream.DStream
 9 | import org.apache.spark.streaming.flume.FlumeUtils
10 | import org.apache.spark.streaming.{Seconds, StreamingContext}
11 | 
12 | /**
13 |   * Created with IDEA
14 |   * Creater: MOBIN
15 |   * Date: 2018/8/4
16 |   * Time: 2:49 PM
17 |   */
18 | object ScalaTransformLogEvents {
19 |   def main(args: Array[String]): Unit = {
20 |     val conf = new SparkConf().setMaster("local[2]").setAppName("Apache Log Transformer")
21 |     val streamCtx = new StreamingContext(conf, Seconds(10))
22 | 
23 |     var address = new Array[InetSocketAddress](1)
24 |     address(0) = new InetSocketAddress("localhost", 9998)
25 |     val flumeStream = FlumeUtils.createPollingStream(streamCtx, address, StorageLevel.MEMORY_AND_DISK_SER_2, 1000, 1)
26 |     val transformLog = new ScalaLogAnalyzerMap()
27 |     val newDStream = flumeStream.flatMap{
28 | 
29 |       x => transformLog.tansformLogData(new String(x.event.getBody.array()))
30 |     }
31 | 
32 |     println("------")
33 |     flumeStream.map(x => x.event.getHeaders).print()
34 |     println("------")
35 | 
36 | 
37 |     executeTransformations(newDStream, streamCtx)
38 |     streamCtx.start()
39 |     streamCtx.awaitTermination()
40 |   }
41 | 
42 |   def executeTransformations(dstream: DStream[(String, String)], context: StreamingContext): Unit ={
43 |     printLogValues(dstream,context)
44 |     println("++++++")
45 |     dstream.filter(x => x._1.equals("method") && x._2.contains("GET")).count().print()
46 |     println("++++++")
47 | 
48 |   }
49 | 
50 |   def printLogValues(stream: DStream[(String, String)], context: StreamingContext){
51 |     stream.foreachRDD(foreachFunc)
52 | 
53 |     def foreachFunc = (rdd: RDD[(String,String)]) => {
54 |       val array = rdd.collect()
55 |       for (dataMap <- array.array){
56 |         println(dataMap._1 + "------" + dataMap._2)
57 |       }
58 |     }
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/GenerateChar.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming
 2 | 
 3 | import java.io.PrintWriter
 4 | import java.net.ServerSocket
 5 | 
 6 | /**
 7 |   * Created by hadoop on 3/28/16.
 8 |   */
 9 | object GenerateChar {
10 |   def generateContext(index : Int) : String = {
11 |     import scala.collection.mutable.ListBuffer
12 |     val charList = ListBuffer[Char]()
13 |     for(i <- 65 to 90)
14 |       charList += i.toChar
15 | 
16 |     val charArray = charList.toArray
17 |     charArray(index).toString
18 |   }
19 | 
20 |   def index = {
21 |     import java.util.Random
22 |     val rdm = new Random
23 |     rdm.nextInt(20)
24 |   }
25 | 
26 |   def main(args: Array[String]) {
27 |     val listener = new ServerSocket(9998)
28 |     println("开始监听...............")
29 |     while(true){
30 |       val socket = listener.accept()
31 |       new Thread(){
32 |         override def run() = {
33 |           println("Got client connected from :"+ socket.getInetAddress)
34 |           val out = new PrintWriter(socket.getOutputStream,true)
35 |           while(true){
36 |             Thread.sleep(500)
37 |             val context = generateContext(index)
38 |             println(context)
39 |             out.write(context + '\n')
40 |             out.flush()
41 |           }
42 |           socket.close()
43 |         }
44 |       }.start()
45 |     }
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/Kafka/UserBehaviorMsgProducer.scala:
--------------------------------------------------------------------------------
 1 | //package com.mobin.sparkStreaming.Kafka
 2 | //import java.util.Properties
 3 | //
 4 | //import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig}
 5 | //
 6 | //import scala.util.Random
 7 | //
 8 | ///**
 9 | //  * Created by Mobin on 2017/9/1.
10 | //  * 生产行为数据消息
11 | //  */
12 | //class UserBehaviorMsgProducer(brokers: String, topic: String) extends Runnable{
13 | //  private val brokerList = brokers
14 | //  private val targetTopic = topic
15 | //  private val props = new Properties()
16 | //  props.put("metadata.broker.list", brokerList)
17 | //  props.put("producer.type", "async")
18 | //  private val config = new ProducerConfig(props)
19 | //  private val producer = new Producer[String, String](config)
20 | //
21 | //  private val  PAGE_NUM =100
22 | //  private val MAX_MSG_NUM= 3
23 | //  private val MAX_CLICK_TIME = 5
24 | //  private val MAX_STAY_TIME = 10
25 | //  private val LIKE_OR_NOT = Array[Int](1, 0, -1)
26 | //
27 | //
28 | //  override def run(): Unit = {
29 | //    val rand = new Random()
30 | //    while (true) {
31 | //      val msgNum = rand.nextInt(MAX_MSG_NUM) + 1
32 | //      for (i <- msgNum) {
33 | //        val msg = new StringBuffer()
34 | //        msg.append("page" + (rand.nextInt(PAGE_NUM) + 1))
35 | //        msg.append("|")
36 | //        msg.append(rand.nextInt(MAX_CLICK_TIME) + 1)
37 | //        msg.append("|")
38 | //        msg.append(rand.nextInt(MAX_CLICK_TIME) + rand.nextFloat())
39 | //        msg.append("|")
40 | //        msg.append(LIKE_OR_NOT(rand.nextInt(3)))
41 | //        println(msg.toString)
42 | //        sendMessage(msg.toString)
43 | //      }
44 | //      println("%d user behavior message producer.".format(msgNum + 1))
45 | //    }
46 | //  }
47 | //
48 | //  def sendMessage(message: String) = {
49 | //    try{
50 | //      val data = new KeyedMessage[String, String](topic, message)
51 | //      producer.send(data)
52 | //    }catch {
53 | //      case e: Exception => println(e)
54 | //    }
55 | //  }
56 | //
57 | //  object UserBehaviorMsgProducerClient{
58 | //    def main(args: Array[String]) {
59 | //      if (args.length < 2 ){
60 | //        println("Usage: UserBehaviorMsgProducerClient ip:9092 user-behavior-topic")
61 | //        System.exit(1)
62 | //      }
63 | //      new Thread(new UserBehaviorMsgProducer(args(0), args(1))).start()
64 | //    }
65 | //  }
66 | //}
67 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/QueueStream.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.rdd.RDD
 5 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 6 | 
 7 | import scala.collection.mutable
 8 | 
 9 | /**
10 |   * Created by hadoop on 4/2/16.
11 |   */
12 | object QueueStream {
13 | 
14 |   def main(args: Array[String]) {
15 |     val conf = new SparkConf().setMaster("local[2]").setAppName("queueStream")
16 |     val ssc = new StreamingContext(conf,Seconds(1))
17 | 
18 |     val rddQueue = new mutable.SynchronizedQueue[RDD[Int]]()
19 | 
20 |     val inputStream = ssc.queueStream(rddQueue)
21 | 
22 |     val mappedStream = inputStream.map(x => (x % 10,1))
23 |     val reduceStream = mappedStream.reduceByKey(_ + _)
24 |     reduceStream.print
25 |     ssc.start()
26 |     for(i <- 1 to 30){
27 |       rddQueue += ssc.sparkContext.makeRDD(1 to 100, 2)
28 |       Thread.sleep(1000)
29 |     }
30 | 
31 |     ssc.stop()
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/ScoketStreaming.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 5 | 
 6 | /**
 7 |   * Created by hadoop on 3/28/16.
 8 |   */
 9 | object ScoketStreaming {
10 | 
11 |   def main(args: Array[String]){
12 |     val conf = new SparkConf().setMaster("local[2]").setAppName("ScoketStreaming")
13 |     val sc = new StreamingContext(conf,Seconds(10))
14 | 
15 |     val lines = sc.socketTextStream("master",9998)
16 |     val words = lines.flatMap(_.split((" ")))
17 |     val wordCounts = words.map(x => (x , 1)).reduceByKey(_ + _)
18 |     wordCounts.print()
19 |     sc.start()
20 |     sc.awaitTermination()
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/ScoketStreamingCheckPoint.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 5 | 
 6 | /**
 7 |   * Created by Mobin on 2017/8/25.
 8 |   */
 9 | object ScoketStreamingCheckPoint {
10 |   val conf = new SparkConf().setMaster("local[*]").setAppName("checkPoint")
11 |   val checkPointPath = "."
12 | 
13 |   def checkPointFun(): StreamingContext = {
14 |        val sc = new StreamingContext(conf, Seconds(5))
15 |        val lines = sc.socketTextStream("localhost",9998)
16 |        sc.checkpoint(checkPointPath)
17 |        val words = lines.flatMap(_.split((" ")))
18 |        val wordCounts = words.map(x => (x , 1)).reduceByKey(_ + _)
19 |        wordCounts.print()
20 |        sc
21 |   }
22 | 
23 |   def main(args: Array[String]) {
24 |     val context = StreamingContext.getOrCreate(checkPointPath, checkPointFun)
25 |     context.start()
26 |     context.awaitTermination()
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/StateFull.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.streaming.StreamingContext._
 5 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 6 | 
 7 | /**
 8 |   * Created by hadoop on 3/31/16.
 9 |   */
10 | object StateFull {
11 | 
12 |   def main(args: Array[String]) {
13 |     val updateFunc = (values: Seq[Int], state: Option[Int]) => {
14 |       val currentCount = values.foldLeft(0)(_ + _)
15 |       val previousCount = state.getOrElse(0)
16 |       Some(currentCount + previousCount)
17 |     }
18 | 
19 |     val conf = new SparkConf().setMaster("local[2]").setAppName("stateFull")
20 |     val sc = new StreamingContext(conf, Seconds(10))
21 |     sc.checkpoint(".")
22 | 
23 |     val lines = sc.socketTextStream("master", 9998)
24 |     val words = lines.flatMap(_.split(" "))
25 |     val wordDstream = words.map(x => (x, 1))
26 | 
27 |     val stateDstream = wordDstream.updateStateByKey[Int](updateFunc)
28 |     stateDstream.print()
29 |     sc.start()
30 |     sc.awaitTermination()
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/main/scala/com/mobin/sparkStreaming/WindowWordCount.scala:
--------------------------------------------------------------------------------
 1 | package com.mobin.sparkStreaming
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 5 | 
 6 | /**
 7 |   * Created by hadoop on 4/2/16.
 8 |   */
 9 | object WindowWordCount {
10 | 
11 |   def main(args: Array[String]) {
12 |     val conf = new SparkConf().setMaster("local[2]").setAppName("windowWordCunt")
13 |     val ssc = new StreamingContext(conf,Seconds(5))
14 |     ssc.checkpoint(".")
15 |     val lines = ssc.socketTextStream("MOBIN",9998)
16 |     val words = lines.flatMap(_.split(" "))
17 |     val wordCounts = words.map(x => (x , 1)).reduceByKeyAndWindow(_+_,_+_,Seconds(60),Seconds(10))
18 |     wordCounts.print
19 |     ssc.start()
20 |     ssc.awaitTermination()
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------