├── .gitignore
├── .idea
    ├── .name
    ├── compiler.xml
    ├── copyright
    │   └── profiles_settings.xml
    ├── encodings.xml
    ├── misc.xml
    ├── modules.xml
    ├── modules
    │   ├── NaturalLanguageProces.iml
    │   ├── calssification-build.iml
    │   ├── calssification.iml
    │   ├── classification-build.iml
    │   └── naturallanguageprocessing-build.iml
    ├── sbt.xml
    ├── scala_compiler.xml
    ├── uiDesigner.xml
    ├── vcs.xml
    └── workspace.xml
├── README.md
├── build.sbt
├── project
    ├── build.properties
    ├── plugins.sbt
    └── target
    │   ├── resolution-cache
    │       ├── default
    │       │   ├── calssification-build
    │       │   │   └── scala_2.10
    │       │   │   │   └── sbt_0.13
    │       │   │   │       └── 0.1-SNAPSHOT
    │       │   │   │           ├── resolved.xml.properties
    │       │   │   │           └── resolved.xml.xml
    │       │   ├── classification-build
    │       │   │   └── scala_2.10
    │       │   │   │   └── sbt_0.13
    │       │   │   │       └── 0.1-SNAPSHOT
    │       │   │   │           ├── resolved.xml.properties
    │       │   │   │           └── resolved.xml.xml
    │       │   └── naturallanguageprocessing-build
    │       │   │   └── scala_2.10
    │       │   │       └── sbt_0.13
    │       │   │           └── 0.1-SNAPSHOT
    │       │   │               ├── resolved.xml.properties
    │       │   │               └── resolved.xml.xml
    │       └── reports
    │       │   ├── default-calssification-build-compile-internal.xml
    │       │   ├── default-calssification-build-compile.xml
    │       │   ├── default-calssification-build-docs.xml
    │       │   ├── default-calssification-build-optional.xml
    │       │   ├── default-calssification-build-plugin.xml
    │       │   ├── default-calssification-build-pom.xml
    │       │   ├── default-calssification-build-provided.xml
    │       │   ├── default-calssification-build-runtime-internal.xml
    │       │   ├── default-calssification-build-runtime.xml
    │       │   ├── default-calssification-build-scala-tool.xml
    │       │   ├── default-calssification-build-sources.xml
    │       │   ├── default-calssification-build-test-internal.xml
    │       │   ├── default-calssification-build-test.xml
    │       │   ├── default-classification-build-compile-internal.xml
    │       │   ├── default-classification-build-compile.xml
    │       │   ├── default-classification-build-docs.xml
    │       │   ├── default-classification-build-optional.xml
    │       │   ├── default-classification-build-plugin.xml
    │       │   ├── default-classification-build-pom.xml
    │       │   ├── default-classification-build-provided.xml
    │       │   ├── default-classification-build-runtime-internal.xml
    │       │   ├── default-classification-build-runtime.xml
    │       │   ├── default-classification-build-scala-tool.xml
    │       │   ├── default-classification-build-sources.xml
    │       │   ├── default-classification-build-test-internal.xml
    │       │   ├── default-classification-build-test.xml
    │       │   ├── default-naturallanguageprocessing-build-compile-internal.xml
    │       │   ├── default-naturallanguageprocessing-build-compile.xml
    │       │   ├── default-naturallanguageprocessing-build-docs.xml
    │       │   ├── default-naturallanguageprocessing-build-optional.xml
    │       │   ├── default-naturallanguageprocessing-build-plugin.xml
    │       │   ├── default-naturallanguageprocessing-build-pom.xml
    │       │   ├── default-naturallanguageprocessing-build-provided.xml
    │       │   ├── default-naturallanguageprocessing-build-runtime-internal.xml
    │       │   ├── default-naturallanguageprocessing-build-runtime.xml
    │       │   ├── default-naturallanguageprocessing-build-scala-tool.xml
    │       │   ├── default-naturallanguageprocessing-build-sources.xml
    │       │   ├── default-naturallanguageprocessing-build-test-internal.xml
    │       │   ├── default-naturallanguageprocessing-build-test.xml
    │       │   ├── ivy-report.css
    │       │   └── ivy-report.xsl
    │   └── streams
    │       ├── $global
    │           ├── $global
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── out
    │           ├── dependencyPositions
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── update_cache_2.10
    │           │   │           ├── input_dsp
    │           │   │           └── output_dsp
    │           ├── ivyConfiguration
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── out
    │           ├── ivySbt
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── out
    │           ├── projectDescriptors
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── out
    │           └── update
    │           │   └── $global
    │           │       └── streams
    │           │           ├── out
    │           │           └── update_cache_2.10
    │           │               ├── inputs
    │           │               └── output
    │       ├── compile
    │           ├── $global
    │           │   └── $global
    │           │   │   └── discoveredMainClasses
    │           │   │       └── data
    │           ├── compile
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── out
    │           ├── compileIncremental
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       ├── export
    │           │   │       └── out
    │           ├── copyResources
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       ├── copy-resources
    │           │   │       └── out
    │           ├── dependencyClasspath
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── export
    │           ├── exportedProducts
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── export
    │           ├── externalDependencyClasspath
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── export
    │           ├── internalDependencyClasspath
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── export
    │           ├── managedClasspath
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── export
    │           ├── unmanagedClasspath
    │           │   └── $global
    │           │   │   └── streams
    │           │   │       └── export
    │           └── unmanagedJars
    │           │   └── $global
    │           │       └── streams
    │           │           └── export
    │       └── runtime
    │           ├── dependencyClasspath
    │               └── $global
    │               │   └── streams
    │               │       └── export
    │           ├── exportedProducts
    │               └── $global
    │               │   └── streams
    │               │       └── export
    │           ├── externalDependencyClasspath
    │               └── $global
    │               │   └── streams
    │               │       └── export
    │           ├── fullClasspath
    │               └── $global
    │               │   └── streams
    │               │       └── export
    │           ├── internalDependencyClasspath
    │               └── $global
    │               │   └── streams
    │               │       └── export
    │           ├── managedClasspath
    │               └── $global
    │               │   └── streams
    │               │       └── export
    │           ├── unmanagedClasspath
    │               └── $global
    │               │   └── streams
    │               │       └── export
    │           └── unmanagedJars
    │               └── $global
    │                   └── streams
    │                       └── export
├── src
    ├── main
    │   └── scala
    │   │   ├── deeplearning
    │   │       ├── cae
    │   │       │   └── CAE.scala
    │   │       ├── cnn
    │   │       │   ├── CNN.scala
    │   │       │   └── CNNModel.scala
    │   │       └── tests
    │   │       │   └── Test_example_CNN.scala
    │   │   ├── intactprogram
    │   │       ├── telecomdataprocessing
    │   │       │   ├── TelecomDataProcess.scala
    │   │       │   └── util
    │   │       │   │   ├── HBaseUtil.scala
    │   │       │   │   └── LoggerUtil.scala
    │   │       ├── telecomdataprocessingAll
    │   │       │   ├── TDP.scala
    │   │       │   ├── TelecomDataProcess.scala
    │   │       │   ├── TelecomDataProcessing.scala
    │   │       │   ├── TelecomDataProcessingByHour.scala
    │   │       │   ├── readFromHdfs.scala
    │   │       │   └── util
    │   │       │   │   ├── HBaseUtil.scala
    │   │       │   │   ├── HDFSUtil.scala
    │   │       │   │   ├── LoggerUtil.scala
    │   │       │   │   └── TimeUtil.scala
    │   │       └── vipstockstatistic
    │   │       │   ├── CorpusBuild.scala
    │   │       │   ├── PredictWithDic.scala
    │   │       │   ├── VipStockStatistic.scala
    │   │       │   └── util
    │   │       │       ├── AnsjAnalyzer.scala
    │   │       │       ├── HBaseUtil.scala
    │   │       │       ├── LoggerUtil.scala
    │   │       │       ├── RedisUtil.scala
    │   │       │       └── config.xml
    │   │   ├── meachinelearning
    │   │       ├── Recommendation
    │   │       │   └── SparkMLlibColbFilter.scala
    │   │       ├── classification
    │   │       │   ├── BinaryClassification.scala
    │   │       │   ├── BinaryClassificationParaOptimization.scala
    │   │       │   ├── BinaryClassificationRDDWithPCA.scala
    │   │       │   ├── BinaryClassificationWithALS.scala
    │   │       │   ├── BinaryClassificationWithPCA.scala
    │   │       │   ├── GaussianKernelSVM.scala
    │   │       │   ├── PCAtest.scala
    │   │       │   └── TrainingProcessWithPCA.scala
    │   │       ├── correlationanalysis
    │   │       │   └── correlationAnalysis.scala
    │   │       ├── data
    │   │       │   └── SupportVectorMachineWithGaussianKernel.txt
    │   │       ├── hotdegreecalculate
    │   │       │   ├── CommunityFrequencyStatistics.scala
    │   │       │   ├── HotDegreeCalculate.scala
    │   │       │   ├── HotDegreeCalculation.scala
    │   │       │   ├── HotDegreeCalculationRDD.scala
    │   │       │   └── fileIO.scala
    │   │       ├── textrank
    │   │       │   ├── AbstractExtract.scala
    │   │       │   ├── ConstructTextGraph.scala
    │   │       │   ├── KeywordExtractor.scala
    │   │       │   ├── PropertyExtractor.scala
    │   │       │   └── TextRank.scala
    │   │       ├── topicmodel
    │   │       │   ├── LDAModel.scala
    │   │       │   ├── LDATest.scala
    │   │       │   └── LatentDirichletAllocationExample.scala
    │   │       └── word2vec
    │   │       │   ├── ClassifyModel.scala
    │   │       │   ├── ClassifyPredict.scala
    │   │       │   ├── DataPrepare.scala
    │   │       │   ├── DeleteDirectory.scala
    │   │       │   ├── Word2Vec.scala
    │   │       │   ├── model
    │   │       │       ├── data
    │   │       │       │   ├── .part-r-00000-e1c254b3-21ba-4759-b7eb-b69f39950551.gz.parquet.crc
    │   │       │       │   ├── _SUCCESS
    │   │       │       │   ├── _common_metadata
    │   │       │       │   ├── _metadata
    │   │       │       │   └── part-r-00000-e1c254b3-21ba-4759-b7eb-b69f39950551.gz.parquet
    │   │       │       └── metadata
    │   │       │       │   ├── .part-00000.crc
    │   │       │       │   ├── _SUCCESS
    │   │       │       │   └── part-00000
    │   │       │   ├── readme.md
    │   │       │   ├── textVectors.scala
    │   │       │   └── twc
    │   │       │       ├── W2VJsonConf.json
    │   │       │       ├── processing.scala
    │   │       │       └── training.scala
    │   │   ├── test
    │   │       └── regularExpression.scala
    │   │   ├── util
    │   │       ├── DataTransform.scala
    │   │       ├── DirectoryUtil.scala
    │   │       ├── FileUtil.scala
    │   │       ├── HBaseUtil.scala
    │   │       ├── HDFSUtil.scala
    │   │       ├── JsonUtil.scala
    │   │       ├── LoggerUtil.scala
    │   │       ├── MySQLUtil.scala
    │   │       ├── RedisUtil.scala
    │   │       ├── TextProcessing.scala
    │   │       ├── TimeUtil.scala
    │   │       ├── UrlCategoryTrim.scala
    │   │       ├── XMLUtil.scala
    │   │       └── regularExpression.scala
    │   │   └── wordSegmentation
    │   │       ├── AnsjAnalyzer.scala
    │   │       └── wordSegmentAnalyser.scala
    └── test
    │   ├── resources
    │       ├── 2016-07-11-15.txt
    │       ├── 2016-07-12-13.txt
    │       ├── 2016-07-12-15.txt
    │       ├── 2016-07-12-16.txt
    │       └── text
    │       │   ├── 1.txt
    │       │   ├── 2.txt
    │       │   └── abstract
    │   └── scala
    │       ├── CNNTest.scala
    │       ├── ClassificationTest.scala
    │       ├── HDFSUtilTest.scala
    │       ├── HotWordsTest.scala
    │       ├── JSONUtilTest.scala
    │       ├── MySQLUtilTest.scala
    │       ├── Test.scala
    │       ├── TextRankTest.scala
    │       ├── classification.scala
    │       ├── keywordExtractorTest.scala
    │       ├── telecomDataProcessingTest.scala
    │       ├── testRankTest.scala
    │       ├── timeutilTest.scala
    │       └── word2vecTest.scala
└── target
    ├── .history
    ├── resolution-cache
        ├── default
        │   ├── classification$sbt_2.10
        │   │   └── 1.0
        │   │   │   ├── resolved.xml.properties
        │   │   │   └── resolved.xml.xml
        │   ├── classification$sources_2.10
        │   │   └── 1.0
        │   │   │   ├── resolved.xml.properties
        │   │   │   └── resolved.xml.xml
        │   ├── classification_2.10
        │   │   └── 1.0
        │   │   │   ├── resolved.xml.properties
        │   │   │   └── resolved.xml.xml
        │   ├── naturallanguageprocessing$sbt_2.10
        │   │   └── 1.0
        │   │   │   ├── resolved.xml.properties
        │   │   │   └── resolved.xml.xml
        │   └── naturallanguageprocessing$sources_2.10
        │   │   └── 1.0
        │   │       ├── resolved.xml.properties
        │   │       └── resolved.xml.xml
        ├── meachinelearning-classification
        │   ├── meachinelearning-classification$sbt_2.10
        │   │   └── 1.0
        │   │   │   ├── resolved.xml.properties
        │   │   │   └── resolved.xml.xml
        │   ├── meachinelearning-classification$sources_2.10
        │   │   └── 1.0
        │   │   │   ├── resolved.xml.properties
        │   │   │   └── resolved.xml.xml
        │   └── meachinelearning-classification_2.10
        │   │   └── 1.0
        │   │       ├── resolved.xml.properties
        │   │       └── resolved.xml.xml
        └── reports
        │   ├── default-classification$sbt_2.10-default.xml
        │   ├── default-classification$sources_2.10-compile-internal.xml
        │   ├── default-classification$sources_2.10-compile.xml
        │   ├── default-classification$sources_2.10-docs.xml
        │   ├── default-classification$sources_2.10-optional.xml
        │   ├── default-classification$sources_2.10-plugin.xml
        │   ├── default-classification$sources_2.10-pom.xml
        │   ├── default-classification$sources_2.10-provided.xml
        │   ├── default-classification$sources_2.10-runtime-internal.xml
        │   ├── default-classification$sources_2.10-runtime.xml
        │   ├── default-classification$sources_2.10-scala-tool.xml
        │   ├── default-classification$sources_2.10-sources.xml
        │   ├── default-classification$sources_2.10-test-internal.xml
        │   ├── default-classification$sources_2.10-test.xml
        │   ├── default-classification_2.10-compile-internal.xml
        │   ├── default-classification_2.10-compile.xml
        │   ├── default-classification_2.10-docs.xml
        │   ├── default-classification_2.10-optional.xml
        │   ├── default-classification_2.10-plugin.xml
        │   ├── default-classification_2.10-pom.xml
        │   ├── default-classification_2.10-provided.xml
        │   ├── default-classification_2.10-runtime-internal.xml
        │   ├── default-classification_2.10-runtime.xml
        │   ├── default-classification_2.10-scala-tool.xml
        │   ├── default-classification_2.10-sources.xml
        │   ├── default-classification_2.10-test-internal.xml
        │   ├── default-classification_2.10-test.xml
        │   ├── default-naturallanguageprocessing$sbt_2.10-default.xml
        │   ├── ivy-report.css
        │   ├── ivy-report.xsl
        │   ├── meachinelearning-classification-meachinelearning-classification$sbt_2.10-default.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-compile-internal.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-compile.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-docs.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-optional.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-plugin.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-pom.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-provided.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-runtime-internal.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-runtime.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-scala-tool.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-sources.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-test-internal.xml
        │   ├── meachinelearning-classification-meachinelearning-classification$sources_2.10-test.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-compile-internal.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-compile.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-docs.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-optional.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-plugin.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-pom.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-provided.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-runtime-internal.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-runtime.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-scala-tool.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-sources.xml
        │   ├── meachinelearning-classification-meachinelearning-classification_2.10-test-internal.xml
        │   └── meachinelearning-classification-meachinelearning-classification_2.10-test.xml
    ├── scala-2.10
        └── test-classes
        │   └── text
        │       ├── 1.txt
        │       ├── 2.txt
        │       └── abstract
    └── streams
        ├── $global
            ├── $global
            │   └── dumpStructure
            │   │   └── $global
            │   │       └── streams
            │   │           └── out
            ├── clean
            │   └── $global
            │   │   └── streams
            │   │       └── out
            ├── dependencyPositions
            │   └── $global
            │   │   └── streams
            │   │       └── update_cache_2.10
            │   │           ├── input_dsp
            │   │           └── output_dsp
            ├── ivyConfiguration
            │   └── $global
            │   │   └── streams
            │   │       └── out
            ├── ivySbt
            │   └── $global
            │   │   └── streams
            │   │       └── out
            ├── projectDescriptors
            │   └── $global
            │   │   └── streams
            │   │       └── out
            ├── update
            │   └── $global
            │   │   └── streams
            │   │       ├── out
            │   │       └── update_cache_2.10
            │   │           ├── inputs
            │   │           └── output
            ├── updateClassifiers
            │   └── $global
            │   │   └── streams
            │   │       └── out
            └── updateSbtClassifiers
            │   └── $global
            │       └── streams
            │           └── out
        ├── compile
            ├── externalDependencyClasspath
            │   └── $global
            │   │   └── streams
            │   │       └── export
            ├── managedClasspath
            │   └── $global
            │   │   └── streams
            │   │       └── export
            ├── unmanagedClasspath
            │   └── $global
            │   │   └── streams
            │   │       └── export
            └── unmanagedJars
            │   └── $global
            │       └── streams
            │           └── export
        ├── runtime
            ├── externalDependencyClasspath
            │   └── $global
            │   │   └── streams
            │   │       └── export
            ├── managedClasspath
            │   └── $global
            │   │   └── streams
            │   │       └── export
            ├── unmanagedClasspath
            │   └── $global
            │   │   └── streams
            │   │       └── export
            └── unmanagedJars
            │   └── $global
            │       └── streams
            │           └── export
        └── test
            ├── externalDependencyClasspath
                └── $global
                │   └── streams
                │       └── export
            ├── managedClasspath
                └── $global
                │   └── streams
                │       └── export
            ├── unmanagedClasspath
                └── $global
                │   └── streams
                │       └── export
            └── unmanagedJars
                └── $global
                    └── streams
                        └── export


/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | 


--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | calssification


--------------------------------------------------------------------------------
/.idea/compiler.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="CompilerConfiguration">
 4 |     <resourceExtensions />
 5 |     <wildcardResourcePatterns>
 6 |       <entry name="!?*.java" />
 7 |       <entry name="!?*.form" />
 8 |       <entry name="!?*.class" />
 9 |       <entry name="!?*.groovy" />
10 |       <entry name="!?*.scala" />
11 |       <entry name="!?*.flex" />
12 |       <entry name="!?*.kt" />
13 |       <entry name="!?*.clj" />
14 |       <entry name="!?*.aj" />
15 |     </wildcardResourcePatterns>
16 |     <annotationProcessing>
17 |       <profile default="true" name="Default" enabled="false">
18 |         <processorPath useClasspath="true" />
19 |       </profile>
20 |     </annotationProcessing>
21 |   </component>
22 | </project>


--------------------------------------------------------------------------------
/.idea/copyright/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="CopyrightManager">
2 |   <settings default="" />
3 | </component>


--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="Encoding">
4 |     <file url="PROJECT" charset="UTF-8" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="EntryPointsManager">
 4 |     <entry_points version="2.0" />
 5 |   </component>
 6 |   <component name="MavenImportPreferences">
 7 |     <option name="generalSettings">
 8 |       <MavenGeneralSettings>
 9 |         <option name="mavenHome" value="Bundled (Maven 3)" />
10 |       </MavenGeneralSettings>
11 |     </option>
12 |   </component>
13 |   <component name="ProjectLevelVcsManager" settingsEditedManually="false">
14 |     <OptionsSetting value="true" id="Add" />
15 |     <OptionsSetting value="true" id="Remove" />
16 |     <OptionsSetting value="true" id="Checkout" />
17 |     <OptionsSetting value="true" id="Update" />
18 |     <OptionsSetting value="true" id="Status" />
19 |     <OptionsSetting value="true" id="Edit" />
20 |     <ConfirmationsSetting value="0" id="Add" />
21 |     <ConfirmationsSetting value="0" id="Remove" />
22 |   </component>
23 |   <component name="ProjectRootManager" version="2" languageLevel="JDK_1_7" default="false" assert-keyword="true" jdk-15="true" project-jdk-name="1.7" project-jdk-type="JavaSDK">
24 |     <output url="file://$PROJECT_DIR$/out" />
25 |   </component>
26 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/modules/classification.iml" filepath="$PROJECT_DIR$/.idea/modules/classification.iml" />
6 |       <module fileurl="file://$PROJECT_DIR$/.idea/modules/classification-build.iml" filepath="$PROJECT_DIR$/.idea/modules/classification-build.iml" />
7 |     </modules>
8 |   </component>
9 | </project>


--------------------------------------------------------------------------------
/.idea/modules/NaturalLanguageProces.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module external.linked.project.id="naturallanguageprocessing" external.linked.project.path="$MODULE_DIR$/../.." external.root.project.path="$MODULE_DIR$/../.." external.system.id="SBT" type="JAVA_MODULE" version="4">
 3 |   <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_7" inherit-compiler-output="false">
 4 |     <output url="file://$MODULE_DIR$/../../target/scala-2.10/classes" />
 5 |     <output-test url="file://$MODULE_DIR$/../../target/scala-2.10/test-classes" />
 6 |     <exclude-output />
 7 |     <content url="file://$MODULE_DIR$/../..">
 8 |       <sourceFolder url="file://$MODULE_DIR$/../../src/main/java" isTestSource="false" />
 9 |       <sourceFolder url="file://$MODULE_DIR$/../../../NaturalLanguage/src/main/scala/scala" isTestSource="false" />
10 |       <sourceFolder url="file://$MODULE_DIR$/../../src/main/scala-2.10" isTestSource="false" />
11 |       <sourceFolder url="file://$MODULE_DIR$/../../target/scala-2.10/src_managed/main" isTestSource="false" />
12 |       <sourceFolder url="file://$MODULE_DIR$/../../src/test/java" isTestSource="true" />
13 |       <sourceFolder url="file://$MODULE_DIR$/../../src/test/scala" isTestSource="true" />
14 |       <sourceFolder url="file://$MODULE_DIR$/../../src/test/scala-2.10" isTestSource="true" />
15 |       <sourceFolder url="file://$MODULE_DIR$/../../target/scala-2.10/src_managed/test" isTestSource="true" />
16 |       <sourceFolder url="file://$MODULE_DIR$/../../src/main/resources" type="java-resource" />
17 |       <sourceFolder url="file://$MODULE_DIR$/../../target/scala-2.10/resource_managed/main" type="java-resource" />
18 |       <sourceFolder url="file://$MODULE_DIR$/../../src/test/resources" type="java-test-resource" />
19 |       <sourceFolder url="file://$MODULE_DIR$/../../target/scala-2.10/resource_managed/test" type="java-test-resource" />
20 |       <excludeFolder url="file://$MODULE_DIR$/../../target" />
21 |       <excludeFolder url="file://$MODULE_DIR$/../../target/resolution-cache" />
22 |       <excludeFolder url="file://$MODULE_DIR$/../../target/streams" />
23 |     </content>
24 |     <orderEntry type="inheritedJdk" />
25 |     <orderEntry type="sourceFolder" forTests="false" />
26 |     <orderEntry type="library" name="scala-sdk-2.10.4" level="application" />
27 |   </component>
28 | </module>


--------------------------------------------------------------------------------
/.idea/modules/calssification-build.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module version="4">
3 |   <component name="NewModuleRootManager" inherit-compiler-output="false">
4 |     <orderEntry type="sourceFolder" forTests="false" />
5 |   </component>
6 | </module>


--------------------------------------------------------------------------------
/.idea/modules/calssification.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module version="4">
3 |   <component name="NewModuleRootManager" inherit-compiler-output="false">
4 |     <orderEntry type="sourceFolder" forTests="false" />
5 |   </component>
6 | </module>


--------------------------------------------------------------------------------
/.idea/sbt.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="ScalaSbtSettings">
 4 |     <option name="linkedExternalProjectsSettings">
 5 |       <SbtProjectSettings>
 6 |         <option name="createEmptyContentRootDirectories" value="true" />
 7 |         <option name="externalProjectPath" value="$PROJECT_DIR$" />
 8 |         <option name="modules">
 9 |           <set>
10 |             <option value="$PROJECT_DIR$" />
11 |             <option value="$PROJECT_DIR$/project" />
12 |           </set>
13 |         </option>
14 |         <option name="resolveSbtClassifiers" value="true" />
15 |         <option name="sbtVersion" value="0.13.8" />
16 |         <option name="useAutoImport" value="true" />
17 |         <option name="useOurOwnAutoImport" value="true" />
18 |         <option name="myModules">
19 |           <set>
20 |             <option value="$PROJECT_DIR$" />
21 |             <option value="$PROJECT_DIR$/project" />
22 |           </set>
23 |         </option>
24 |       </SbtProjectSettings>
25 |     </option>
26 |   </component>
27 | </project>


--------------------------------------------------------------------------------
/.idea/scala_compiler.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ScalaCompilerConfiguration">
4 |     <profile name="SBT 1" modules="calssification,classification" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Natural Language Processing
 2 | ## introduce
 3 | 
 4 | This is a Natural Language Processing package, it including Machine Learning utils and basic NLp utils.
 5 | 
 6 | ## Machine Learning
 7 | 
 8 | Natural Language Processing by using Machine Learning algorithms.
 9 | 
10 | ### TextClassification
11 | 
12 | Text classification by using Bayesian, svmWithSGD, GaussianKernelSVM.
13 | 
14 | #### Bayesian
15 | 
16 | #### SVMWithSGD
17 | 
18 | #### GaussianKernelSVM
19 | 
20 | ### CorrelationAnalysis
21 | 
22 | ### HotDegreeCalculate
23 | 
24 | The hot degree of keywords using bayes average and law of newton cooling.
25 | 
26 | ###   TextRank
27 | 
28 | Based on pageRank.
29 | 
30 | ###   TopicModel
31 | 
32 | LDA
33 | 
34 | ##   Util
35 | 
36 | Preprocessor tools
37 | 
38 | shipment of gold damaged in a fire, shipment of gold damaged in a fire,
39 | delivery of silver arrived in a silver truck
40 | shipment of gold arrived in a truck


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
 1 | name := "NaturalLanguageProcessing"
 2 | 
 3 | version := "1.0"
 4 | 
 5 | scalaVersion := "2.10.4"
 6 | 
 7 | // kunyan分词接口
 8 | resolvers += "Kunyan Repo" at "http://61.147.114.67:8081/nexus/content/groups/public/"
 9 | 
10 | libraryDependencies += "com.kunyan" % "nlpsuit-package" % "0.2.8.3"
11 | 
12 | libraryDependencies += "org.scalactic" %% "scalactic" % "2.2.5" % "test"
13 | 
14 | libraryDependencies += "org.scalatest" %% "scalatest" % "2.2.5" % "test"
15 | 
16 | libraryDependencies += "org.scala-lang" % "scala-compiler" % "2.10.4"
17 | 
18 | libraryDependencies += "org.apache.hadoop" % "hadoop-common" % "2.7.1" excludeAll ExclusionRule(organization = "javax.servlet")
19 | 
20 | libraryDependencies += "org.apache.hadoop" % "hadoop-hdfs" % "2.7.1" % "provided"
21 | 
22 | libraryDependencies += "org.apache.spark" % "spark-core_2.10" % "1.5.2"
23 | 
24 | libraryDependencies += "org.apache.spark" % "spark-mllib_2.10" % "1.5.2"
25 | 
26 | libraryDependencies += "mysql" % "mysql-connector-java" % "3.1.14"
27 | 
28 | libraryDependencies += "org.graphstream" % "gs-core" % "1.1.2"
29 | 
30 | libraryDependencies += "org.apache.spark" % "spark-graphx_2.10" % "1.5.2"
31 | 
32 | libraryDependencies += "com.ibm.icu" % "icu4j" % "56.1"
33 | 
34 | libraryDependencies += "org.apache.hbase" % "hbase" % "0.98.2-hadoop2"
35 | 
36 | libraryDependencies += "org.apache.hbase" % "hbase-client" % "1.1.2"
37 | 
38 | libraryDependencies += "org.apache.hbase" % "hbase-common" % "1.1.2"
39 | 
40 | libraryDependencies += "org.apache.hbase" % "hbase-server" % "1.1.2"
41 | 
42 | //libraryDependencies += "org.scalanlp" % "breeze_2.10" % "0.11.2"
43 | 
44 | libraryDependencies += "org.scalanlp" % "breeze-math_2.10" % "0.4" intransitive()
45 | 
46 | //libraryDependencies += "org.scalanlp" % "breeze-learn_2.9.2" % "0.2" intransitive()
47 | 
48 | libraryDependencies += "org.scalanlp" % "breeze-process_2.10" % "0.3" intransitive()
49 | 
50 | libraryDependencies += "org.scalanlp" % "breeze-viz_2.10" % "0.12" exclude("org.scalanlp", "breeze_2.10")
51 | 
52 | libraryDependencies += "org.scalanlp" % "nak_2.10" % "1.3"
53 | 
54 | libraryDependencies += "redis.clients" % "jedis" % "2.8.0"
55 | 
56 | libraryDependencies += "org.ansj" % "ansj_seg" % "5.0.2"
57 | 
58 | libraryDependencies += "org.json" % "json" % "20160212"
59 | 
60 | libraryDependencies += "org.nlpcn" % "nlp-lang" % "1.7"
61 | 
62 | assemblyMergeStrategy in assembly := {
63 |   case PathList("javax", "servlet", xs @ _*) => MergeStrategy.last
64 |   case PathList("javax", "activation", xs @ _*) => MergeStrategy.last
65 |   case PathList("javax", "el", xs @ _*) => MergeStrategy.last
66 |   case PathList("org", "apache", xs @ _*) => MergeStrategy.last
67 |   case PathList("com", "google", xs @ _*) => MergeStrategy.last
68 |   case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.last
69 |   case PathList("com", "codahale", xs @ _*) => MergeStrategy.last
70 |   case PathList("com", "yammer", xs @ _*) => MergeStrategy.last
71 |   case "about.html" => MergeStrategy.rename
72 |   case "META-INF/ECLIPSEF.RSA" => MergeStrategy.last
73 |   case "META-INF/mailcap" => MergeStrategy.last
74 |   case "META-INF/mimetypes.default" => MergeStrategy.last
75 |   case "plugin.properties" => MergeStrategy.last
76 |   case "log4j.properties" => MergeStrategy.last
77 |   case x =>
78 |     val oldStrategy = (assemblyMergeStrategy in assembly).value
79 |     oldStrategy(x)
80 | }
81 | 
82 | 
83 | test in assembly := {}
84 | 


--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | sbt.version = 0.13.8


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | logLevel := Level.Warn
2 | 
3 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.1")


--------------------------------------------------------------------------------
/project/target/resolution-cache/default/calssification-build/scala_2.10/sbt_0.13/0.1-SNAPSHOT/resolved.xml.properties:
--------------------------------------------------------------------------------
1 | #default#calssification-build;0.1-SNAPSHOT resolved revisions
2 | #Wed Mar 30 14:23:46 CST 2016
3 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-library\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.10.4 ? 2.10.4 null
4 | +e\:scalaVersion\:\#@\#\:+2.10\:\#@\#\:+revision\:\#@\#\:+0.14.1\:\#@\#\:+module\:\#@\#\:+sbt-assembly\:\#@\#\:+e\:sbtVersion\:\#@\#\:+0.13\:\#@\#\:+organisation\:\#@\#\:+com.eed3si9n\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.14.1 release 0.14.1 null
5 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-compiler\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.10.4 release 2.10.4 null
6 | +revision\:\#@\#\:+0.13.8\:\#@\#\:+module\:\#@\#\:+sbt\:\#@\#\:+organisation\:\#@\#\:+org.scala-sbt\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.13.8 release 0.13.8 null
7 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/default/calssification-build/scala_2.10/sbt_0.13/0.1-SNAPSHOT/resolved.xml.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <ivy-module version="2.0" xmlns:e="http://ant.apache.org/ivy/extra">
 3 | 	<info organisation="default"
 4 | 		module="calssification-build"
 5 | 		revision="0.1-SNAPSHOT"
 6 | 		status="integration"
 7 | 		publication="20160330142341"
 8 | 		e:scalaVersion="2.10" e:sbtVersion="0.13"
 9 | 	>
10 | 		<description>
11 | 		calssification-build
12 | 		</description>
13 | 	</info>
14 | 	<configurations>
15 | 		<conf name="compile" visibility="public" description=""/>
16 | 		<conf name="runtime" visibility="public" description="" extends="compile"/>
17 | 		<conf name="test" visibility="public" description="" extends="runtime"/>
18 | 		<conf name="provided" visibility="public" description=""/>
19 | 		<conf name="optional" visibility="public" description=""/>
20 | 		<conf name="compile-internal" visibility="private" description="" extends="compile,optional,provided"/>
21 | 		<conf name="runtime-internal" visibility="private" description="" extends="runtime,optional"/>
22 | 		<conf name="test-internal" visibility="private" description="" extends="test,optional,provided"/>
23 | 		<conf name="plugin" visibility="private" description=""/>
24 | 		<conf name="sources" visibility="public" description=""/>
25 | 		<conf name="docs" visibility="public" description=""/>
26 | 		<conf name="pom" visibility="public" description=""/>
27 | 		<conf name="scala-tool" visibility="private" description=""/>
28 | 	</configurations>
29 | 	<publications>
30 | 		<artifact name="calssification-build" type="pom" ext="pom" conf="pom"/>
31 | 		<artifact name="calssification-build" type="jar" ext="jar" conf="compile"/>
32 | 		<artifact name="calssification-build" type="src" ext="jar" conf="sources" e:classifier="sources"/>
33 | 		<artifact name="calssification-build" type="doc" ext="jar" conf="docs" e:classifier="javadoc"/>
34 | 	</publications>
35 | 	<dependencies>
36 | 		<dependency org="org.scala-lang" name="scala-compiler" rev="2.10.4" conf="scala-tool->default,optional(default)"/>
37 | 		<dependency org="org.scala-lang" name="scala-library" rev="2.10.4" conf="scala-tool->default,optional(default);provided->default(compile)"/>
38 | 		<dependency org="org.scala-sbt" name="sbt" rev="0.13.8" conf="provided->default(compile)"/>
39 | 		<dependency org="com.eed3si9n" name="sbt-assembly" rev="0.14.1" conf="compile->default(compile)" e:scalaVersion="2.10" e:sbtVersion="0.13"/>
40 | 		<override org="org.scala-lang" module="scala-library" matcher="exact" rev="2.10.4"/>
41 | 		<override org="org.scala-lang" module="scala-compiler" matcher="exact" rev="2.10.4"/>
42 | 		<override org="org.scala-lang" module="scala-reflect" matcher="exact" rev="2.10.4"/>
43 | 	</dependencies>
44 | </ivy-module>
45 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/default/classification-build/scala_2.10/sbt_0.13/0.1-SNAPSHOT/resolved.xml.properties:
--------------------------------------------------------------------------------
1 | #default#classification-build;0.1-SNAPSHOT resolved revisions
2 | #Tue Apr 12 10:12:42 CST 2016
3 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-library\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.10.4 ? 2.10.4 null
4 | +e\:scalaVersion\:\#@\#\:+2.10\:\#@\#\:+revision\:\#@\#\:+0.14.1\:\#@\#\:+module\:\#@\#\:+sbt-assembly\:\#@\#\:+e\:sbtVersion\:\#@\#\:+0.13\:\#@\#\:+organisation\:\#@\#\:+com.eed3si9n\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.14.1 release 0.14.1 null
5 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-compiler\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.10.4 release 2.10.4 null
6 | +revision\:\#@\#\:+0.13.8\:\#@\#\:+module\:\#@\#\:+sbt\:\#@\#\:+organisation\:\#@\#\:+org.scala-sbt\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.13.8 release 0.13.8 null
7 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/default/classification-build/scala_2.10/sbt_0.13/0.1-SNAPSHOT/resolved.xml.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <ivy-module version="2.0" xmlns:e="http://ant.apache.org/ivy/extra">
 3 | 	<info organisation="default"
 4 | 		module="classification-build"
 5 | 		revision="0.1-SNAPSHOT"
 6 | 		status="integration"
 7 | 		publication="20160412101233"
 8 | 		e:scalaVersion="2.10" e:sbtVersion="0.13"
 9 | 	>
10 | 		<description>
11 | 		classification-build
12 | 		</description>
13 | 	</info>
14 | 	<configurations>
15 | 		<conf name="compile" visibility="public" description=""/>
16 | 		<conf name="runtime" visibility="public" description="" extends="compile"/>
17 | 		<conf name="test" visibility="public" description="" extends="runtime"/>
18 | 		<conf name="provided" visibility="public" description=""/>
19 | 		<conf name="optional" visibility="public" description=""/>
20 | 		<conf name="compile-internal" visibility="private" description="" extends="compile,optional,provided"/>
21 | 		<conf name="runtime-internal" visibility="private" description="" extends="runtime,optional"/>
22 | 		<conf name="test-internal" visibility="private" description="" extends="test,optional,provided"/>
23 | 		<conf name="plugin" visibility="private" description=""/>
24 | 		<conf name="sources" visibility="public" description=""/>
25 | 		<conf name="docs" visibility="public" description=""/>
26 | 		<conf name="pom" visibility="public" description=""/>
27 | 		<conf name="scala-tool" visibility="private" description=""/>
28 | 	</configurations>
29 | 	<publications>
30 | 		<artifact name="classification-build" type="pom" ext="pom" conf="pom"/>
31 | 		<artifact name="classification-build" type="jar" ext="jar" conf="compile"/>
32 | 		<artifact name="classification-build" type="src" ext="jar" conf="sources" e:classifier="sources"/>
33 | 		<artifact name="classification-build" type="doc" ext="jar" conf="docs" e:classifier="javadoc"/>
34 | 	</publications>
35 | 	<dependencies>
36 | 		<dependency org="org.scala-lang" name="scala-compiler" rev="2.10.4" conf="scala-tool->default,optional(default)"/>
37 | 		<dependency org="org.scala-lang" name="scala-library" rev="2.10.4" conf="scala-tool->default,optional(default);provided->default(compile)"/>
38 | 		<dependency org="org.scala-sbt" name="sbt" rev="0.13.8" conf="provided->default(compile)"/>
39 | 		<dependency org="com.eed3si9n" name="sbt-assembly" rev="0.14.1" conf="compile->default(compile)" e:scalaVersion="2.10" e:sbtVersion="0.13"/>
40 | 		<override org="org.scala-lang" module="scala-library" matcher="exact" rev="2.10.4"/>
41 | 		<override org="org.scala-lang" module="scala-compiler" matcher="exact" rev="2.10.4"/>
42 | 		<override org="org.scala-lang" module="scala-reflect" matcher="exact" rev="2.10.4"/>
43 | 	</dependencies>
44 | </ivy-module>
45 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/default/naturallanguageprocessing-build/scala_2.10/sbt_0.13/0.1-SNAPSHOT/resolved.xml.properties:
--------------------------------------------------------------------------------
1 | #default#naturallanguageprocessing-build;0.1-SNAPSHOT resolved revisions
2 | #Wed Oct 12 10:38:54 CST 2016
3 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-library\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.10.4 ? 2.10.4 null
4 | +e\:scalaVersion\:\#@\#\:+2.10\:\#@\#\:+revision\:\#@\#\:+0.14.1\:\#@\#\:+module\:\#@\#\:+sbt-assembly\:\#@\#\:+e\:sbtVersion\:\#@\#\:+0.13\:\#@\#\:+organisation\:\#@\#\:+com.eed3si9n\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.14.1 release 0.14.1 null
5 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-compiler\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.10.4 release 2.10.4 null
6 | +revision\:\#@\#\:+0.13.8\:\#@\#\:+module\:\#@\#\:+sbt\:\#@\#\:+organisation\:\#@\#\:+org.scala-sbt\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.13.8 release 0.13.8 null
7 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/default/naturallanguageprocessing-build/scala_2.10/sbt_0.13/0.1-SNAPSHOT/resolved.xml.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <ivy-module version="2.0" xmlns:e="http://ant.apache.org/ivy/extra">
 3 | 	<info organisation="default"
 4 | 		module="naturallanguageprocessing-build"
 5 | 		revision="0.1-SNAPSHOT"
 6 | 		status="integration"
 7 | 		publication="20161012103835"
 8 | 		e:scalaVersion="2.10" e:sbtVersion="0.13"
 9 | 	>
10 | 		<description>
11 | 		naturallanguageprocessing-build
12 | 		</description>
13 | 	</info>
14 | 	<configurations>
15 | 		<conf name="compile" visibility="public" description=""/>
16 | 		<conf name="runtime" visibility="public" description="" extends="compile"/>
17 | 		<conf name="test" visibility="public" description="" extends="runtime"/>
18 | 		<conf name="provided" visibility="public" description=""/>
19 | 		<conf name="optional" visibility="public" description=""/>
20 | 		<conf name="compile-internal" visibility="private" description="" extends="compile,optional,provided"/>
21 | 		<conf name="runtime-internal" visibility="private" description="" extends="runtime,optional"/>
22 | 		<conf name="test-internal" visibility="private" description="" extends="test,optional,provided"/>
23 | 		<conf name="plugin" visibility="private" description=""/>
24 | 		<conf name="sources" visibility="public" description=""/>
25 | 		<conf name="docs" visibility="public" description=""/>
26 | 		<conf name="pom" visibility="public" description=""/>
27 | 		<conf name="scala-tool" visibility="private" description=""/>
28 | 	</configurations>
29 | 	<publications>
30 | 		<artifact name="naturallanguageprocessing-build" type="pom" ext="pom" conf="pom"/>
31 | 		<artifact name="naturallanguageprocessing-build" type="jar" ext="jar" conf="compile"/>
32 | 		<artifact name="naturallanguageprocessing-build" type="src" ext="jar" conf="sources" e:classifier="sources"/>
33 | 		<artifact name="naturallanguageprocessing-build" type="doc" ext="jar" conf="docs" e:classifier="javadoc"/>
34 | 	</publications>
35 | 	<dependencies>
36 | 		<dependency org="org.scala-lang" name="scala-compiler" rev="2.10.4" conf="scala-tool->default,optional(default)"/>
37 | 		<dependency org="org.scala-lang" name="scala-library" rev="2.10.4" conf="scala-tool->default,optional(default);provided->default(compile)"/>
38 | 		<dependency org="org.scala-sbt" name="sbt" rev="0.13.8" conf="provided->default(compile)"/>
39 | 		<dependency org="com.eed3si9n" name="sbt-assembly" rev="0.14.1" conf="compile->default(compile)" e:scalaVersion="2.10" e:sbtVersion="0.13"/>
40 | 		<override org="org.scala-lang" module="scala-library" matcher="exact" rev="2.10.4"/>
41 | 		<override org="org.scala-lang" module="scala-compiler" matcher="exact" rev="2.10.4"/>
42 | 		<override org="org.scala-lang" module="scala-reflect" matcher="exact" rev="2.10.4"/>
43 | 	</dependencies>
44 | </ivy-module>
45 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-calssification-build-docs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="calssification-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="docs"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20160330142341"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-calssification-build-optional.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="calssification-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="optional"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20160330142341"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-calssification-build-plugin.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="calssification-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="plugin"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20160330142341"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-calssification-build-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="calssification-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="pom"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20160330142341"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-calssification-build-sources.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="calssification-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="sources"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20160330142341"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-classification-build-docs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="docs"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20160412101233"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-classification-build-optional.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="optional"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20160412101233"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-classification-build-plugin.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="plugin"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20160412101233"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-classification-build-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="pom"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20160412101233"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-classification-build-sources.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="sources"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20160412101233"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-naturallanguageprocessing-build-docs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="naturallanguageprocessing-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="docs"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20161012103837"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-naturallanguageprocessing-build-optional.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="naturallanguageprocessing-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="optional"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20161012103837"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-naturallanguageprocessing-build-plugin.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="naturallanguageprocessing-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="plugin"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20161012103837"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-naturallanguageprocessing-build-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="naturallanguageprocessing-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="pom"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20161012103837"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/resolution-cache/reports/default-naturallanguageprocessing-build-sources.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="naturallanguageprocessing-build"
 7 | 		revision="0.1-SNAPSHOT"
 8 | 		extra-sbtVersion="0.13"
 9 | 		extra-scalaVersion="2.10"
10 | 		conf="sources"
11 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
12 | 		date="20161012103837"/>
13 | 	<dependencies>
14 | 	</dependencies>
15 | </ivy-report>
16 | 


--------------------------------------------------------------------------------
/project/target/streams/$global/$global/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/project/target/streams/$global/$global/$global/streams/out


--------------------------------------------------------------------------------
/project/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.10/input_dsp:
--------------------------------------------------------------------------------
1 |     org.scala-lang scala-library 2.10.4 provided                  com.eed3si9n sbt-assembly 0.14.1              e:sbtVersion 0.13 e:scalaVersion 2.10    


--------------------------------------------------------------------------------
/project/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.10/output_dsp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/project/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.10/output_dsp


--------------------------------------------------------------------------------
/project/target/streams/$global/ivyConfiguration/$global/streams/out:
--------------------------------------------------------------------------------
1 | [debug] Other repositories:
2 | [debug] Default repositories:
3 | [debug] Using inline dependencies specified in Scala.
4 | 


--------------------------------------------------------------------------------
/project/target/streams/$global/ivySbt/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/project/target/streams/$global/ivySbt/$global/streams/out


--------------------------------------------------------------------------------
/project/target/streams/$global/projectDescriptors/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/project/target/streams/$global/projectDescriptors/$global/streams/out


--------------------------------------------------------------------------------
/project/target/streams/$global/update/$global/streams/update_cache_2.10/inputs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/project/target/streams/$global/update/$global/streams/update_cache_2.10/inputs


--------------------------------------------------------------------------------
/project/target/streams/$global/update/$global/streams/update_cache_2.10/output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/project/target/streams/$global/update/$global/streams/update_cache_2.10/output


--------------------------------------------------------------------------------
/project/target/streams/compile/$global/$global/discoveredMainClasses/data:
--------------------------------------------------------------------------------
1 |     


--------------------------------------------------------------------------------
/project/target/streams/compile/compile/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/project/target/streams/compile/compile/$global/streams/out


--------------------------------------------------------------------------------
/project/target/streams/compile/compileIncremental/$global/streams/export:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/project/target/streams/compile/compileIncremental/$global/streams/export


--------------------------------------------------------------------------------
/project/target/streams/compile/compileIncremental/$global/streams/out:
--------------------------------------------------------------------------------
 1 | [debug] 
 2 | [debug] Initial source changes: 
 3 | [debug] 	removed:Set()
 4 | [debug] 	added: Set()
 5 | [debug] 	modified: Set()
 6 | [debug] Removed products: Set()
 7 | [debug] External API changes: API Changes: Set()
 8 | [debug] Modified binary dependencies: Set()
 9 | [debug] Initial directly invalidated sources: Set()
10 | [debug] 
11 | [debug] Sources indirectly invalidated by:
12 | [debug] 	product: Set()
13 | [debug] 	binary dep: Set()
14 | [debug] 	external source: Set()
15 | [debug] All initially invalidated sources: Set()
16 | 


--------------------------------------------------------------------------------
/project/target/streams/compile/copyResources/$global/streams/copy-resources:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/project/target/streams/compile/copyResources/$global/streams/copy-resources


--------------------------------------------------------------------------------
/project/target/streams/compile/copyResources/$global/streams/out:
--------------------------------------------------------------------------------
1 | [debug] Copy resource mappings: 
2 | [debug] 	
3 | 


--------------------------------------------------------------------------------
/project/target/streams/compile/exportedProducts/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/li/workshop/NaturalLanguageProcessing/project/target/scala-2.10/sbt-0.13/classes
2 | 


--------------------------------------------------------------------------------
/project/target/streams/compile/internalDependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/project/target/streams/compile/unmanagedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/project/target/streams/compile/unmanagedJars/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/project/target/streams/runtime/dependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/li/workshop/NaturalLanguageProcessing/project/target/scala-2.10/sbt-0.13/classes:/Users/li/.ivy2/cache/scala_2.10/sbt_0.13/com.eed3si9n/sbt-assembly/jars/sbt-assembly-0.14.1.jar:/Users/li/.ivy2/cache/org.scalactic/scalactic_2.10/bundles/scalactic_2.10-2.2.1.jar:/Users/li/.sbt/boot/scala-2.10.4/lib/scala-library.jar:/Users/li/.sbt/boot/scala-2.10.4/lib/scala-reflect.jar:/Users/li/.ivy2/cache/org.pantsbuild/jarjar/jars/jarjar-1.6.0.jar:/Users/li/.ivy2/cache/org.apache.ant/ant/jars/ant-1.9.6.jar:/Users/li/.ivy2/cache/org.apache.ant/ant-launcher/jars/ant-launcher-1.9.6.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm/jars/asm-5.0.4.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm-commons/jars/asm-commons-5.0.4.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm-tree/jars/asm-tree-5.0.4.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-plugin-api/jars/maven-plugin-api-3.3.3.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-model/jars/maven-model-3.3.3.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-utils/jars/plexus-utils-3.0.20.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-artifact/jars/maven-artifact-3.3.3.jar:/Users/li/.ivy2/cache/org.eclipse.sisu/org.eclipse.sisu.plexus/eclipse-plugins/org.eclipse.sisu.plexus-0.3.0.jar:/Users/li/.ivy2/cache/javax.enterprise/cdi-api/jars/cdi-api-1.0.jar:/Users/li/.ivy2/cache/javax.annotation/jsr250-api/jars/jsr250-api-1.0.jar:/Users/li/.ivy2/cache/javax.inject/javax.inject/jars/javax.inject-1.jar:/Users/li/.ivy2/cache/org.eclipse.sisu/org.eclipse.sisu.inject/eclipse-plugins/org.eclipse.sisu.inject-0.3.0.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-component-annotations/jars/plexus-component-annotations-1.5.5.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-classworlds/bundles/plexus-classworlds-2.5.2.jar
2 | 


--------------------------------------------------------------------------------
/project/target/streams/runtime/exportedProducts/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/li/workshop/NaturalLanguageProcessing/project/target/scala-2.10/sbt-0.13/classes
2 | 


--------------------------------------------------------------------------------
/project/target/streams/runtime/externalDependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/li/.ivy2/cache/scala_2.10/sbt_0.13/com.eed3si9n/sbt-assembly/jars/sbt-assembly-0.14.1.jar:/Users/li/.ivy2/cache/org.scalactic/scalactic_2.10/bundles/scalactic_2.10-2.2.1.jar:/Users/li/.sbt/boot/scala-2.10.4/lib/scala-library.jar:/Users/li/.sbt/boot/scala-2.10.4/lib/scala-reflect.jar:/Users/li/.ivy2/cache/org.pantsbuild/jarjar/jars/jarjar-1.6.0.jar:/Users/li/.ivy2/cache/org.apache.ant/ant/jars/ant-1.9.6.jar:/Users/li/.ivy2/cache/org.apache.ant/ant-launcher/jars/ant-launcher-1.9.6.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm/jars/asm-5.0.4.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm-commons/jars/asm-commons-5.0.4.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm-tree/jars/asm-tree-5.0.4.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-plugin-api/jars/maven-plugin-api-3.3.3.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-model/jars/maven-model-3.3.3.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-utils/jars/plexus-utils-3.0.20.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-artifact/jars/maven-artifact-3.3.3.jar:/Users/li/.ivy2/cache/org.eclipse.sisu/org.eclipse.sisu.plexus/eclipse-plugins/org.eclipse.sisu.plexus-0.3.0.jar:/Users/li/.ivy2/cache/javax.enterprise/cdi-api/jars/cdi-api-1.0.jar:/Users/li/.ivy2/cache/javax.annotation/jsr250-api/jars/jsr250-api-1.0.jar:/Users/li/.ivy2/cache/javax.inject/javax.inject/jars/javax.inject-1.jar:/Users/li/.ivy2/cache/org.eclipse.sisu/org.eclipse.sisu.inject/eclipse-plugins/org.eclipse.sisu.inject-0.3.0.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-component-annotations/jars/plexus-component-annotations-1.5.5.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-classworlds/bundles/plexus-classworlds-2.5.2.jar
2 | 


--------------------------------------------------------------------------------
/project/target/streams/runtime/fullClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/li/workshop/NaturalLanguageProcessing/project/target/scala-2.10/sbt-0.13/classes:/Users/li/.ivy2/cache/scala_2.10/sbt_0.13/com.eed3si9n/sbt-assembly/jars/sbt-assembly-0.14.1.jar:/Users/li/.ivy2/cache/org.scalactic/scalactic_2.10/bundles/scalactic_2.10-2.2.1.jar:/Users/li/.sbt/boot/scala-2.10.4/lib/scala-library.jar:/Users/li/.sbt/boot/scala-2.10.4/lib/scala-reflect.jar:/Users/li/.ivy2/cache/org.pantsbuild/jarjar/jars/jarjar-1.6.0.jar:/Users/li/.ivy2/cache/org.apache.ant/ant/jars/ant-1.9.6.jar:/Users/li/.ivy2/cache/org.apache.ant/ant-launcher/jars/ant-launcher-1.9.6.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm/jars/asm-5.0.4.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm-commons/jars/asm-commons-5.0.4.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm-tree/jars/asm-tree-5.0.4.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-plugin-api/jars/maven-plugin-api-3.3.3.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-model/jars/maven-model-3.3.3.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-utils/jars/plexus-utils-3.0.20.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-artifact/jars/maven-artifact-3.3.3.jar:/Users/li/.ivy2/cache/org.eclipse.sisu/org.eclipse.sisu.plexus/eclipse-plugins/org.eclipse.sisu.plexus-0.3.0.jar:/Users/li/.ivy2/cache/javax.enterprise/cdi-api/jars/cdi-api-1.0.jar:/Users/li/.ivy2/cache/javax.annotation/jsr250-api/jars/jsr250-api-1.0.jar:/Users/li/.ivy2/cache/javax.inject/javax.inject/jars/javax.inject-1.jar:/Users/li/.ivy2/cache/org.eclipse.sisu/org.eclipse.sisu.inject/eclipse-plugins/org.eclipse.sisu.inject-0.3.0.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-component-annotations/jars/plexus-component-annotations-1.5.5.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-classworlds/bundles/plexus-classworlds-2.5.2.jar
2 | 


--------------------------------------------------------------------------------
/project/target/streams/runtime/internalDependencyClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/li/workshop/NaturalLanguageProcessing/project/target/scala-2.10/sbt-0.13/classes
2 | 


--------------------------------------------------------------------------------
/project/target/streams/runtime/managedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | /Users/li/.ivy2/cache/scala_2.10/sbt_0.13/com.eed3si9n/sbt-assembly/jars/sbt-assembly-0.14.1.jar:/Users/li/.ivy2/cache/org.scalactic/scalactic_2.10/bundles/scalactic_2.10-2.2.1.jar:/Users/li/.sbt/boot/scala-2.10.4/lib/scala-library.jar:/Users/li/.sbt/boot/scala-2.10.4/lib/scala-reflect.jar:/Users/li/.ivy2/cache/org.pantsbuild/jarjar/jars/jarjar-1.6.0.jar:/Users/li/.ivy2/cache/org.apache.ant/ant/jars/ant-1.9.6.jar:/Users/li/.ivy2/cache/org.apache.ant/ant-launcher/jars/ant-launcher-1.9.6.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm/jars/asm-5.0.4.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm-commons/jars/asm-commons-5.0.4.jar:/Users/li/.ivy2/cache/org.ow2.asm/asm-tree/jars/asm-tree-5.0.4.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-plugin-api/jars/maven-plugin-api-3.3.3.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-model/jars/maven-model-3.3.3.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-utils/jars/plexus-utils-3.0.20.jar:/Users/li/.ivy2/cache/org.apache.maven/maven-artifact/jars/maven-artifact-3.3.3.jar:/Users/li/.ivy2/cache/org.eclipse.sisu/org.eclipse.sisu.plexus/eclipse-plugins/org.eclipse.sisu.plexus-0.3.0.jar:/Users/li/.ivy2/cache/javax.enterprise/cdi-api/jars/cdi-api-1.0.jar:/Users/li/.ivy2/cache/javax.annotation/jsr250-api/jars/jsr250-api-1.0.jar:/Users/li/.ivy2/cache/javax.inject/javax.inject/jars/javax.inject-1.jar:/Users/li/.ivy2/cache/org.eclipse.sisu/org.eclipse.sisu.inject/eclipse-plugins/org.eclipse.sisu.inject-0.3.0.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-component-annotations/jars/plexus-component-annotations-1.5.5.jar:/Users/li/.ivy2/cache/org.codehaus.plexus/plexus-classworlds/bundles/plexus-classworlds-2.5.2.jar
2 | 


--------------------------------------------------------------------------------
/project/target/streams/runtime/unmanagedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/project/target/streams/runtime/unmanagedJars/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/main/scala/deeplearning/cae/CAE.scala:
--------------------------------------------------------------------------------
1 | package deeplearning.cae
2 | 
3 | /**
4 |   * Created by li on 16/8/15.
5 |   */
6 | object CAE {
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/src/main/scala/deeplearning/cnn/CNNModel.scala:
--------------------------------------------------------------------------------
 1 | package deeplearning.cnn
 2 | 
 3 | import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, DenseVector => BDV, Matrix => BM, SparseVector => BSV, Vector => BV}
 4 | import org.apache.spark.rdd.RDD
 5 | 
 6 | /**
 7 |   * label：目标矩阵
 8 |   * features：特征矩阵
 9 |   * predict_label：预测矩阵
10 |   * error：误差
11 |   */
12 | case class PredictCNNLabel(label: BDM[Double], features: BDM[Double], predict_label: BDM[Double], error: BDM[Double]) extends Serializable
13 | 
14 | class CNNModel(
15 |                 val cnn_layers: Array[CNNLayers],
16 |                 val cnn_ffW: BDM[Double],
17 |                 val cnn_ffb: BDM[Double]) extends Serializable {
18 | 
19 |   /**
20 |     * 返回预测结果
21 |     *  返回格式：(label, feature,  predict_label, error)
22 |     */
23 |   def predict(dataMatrix: RDD[(BDM[Double], BDM[Double])]): RDD[PredictCNNLabel] = {
24 |     val sc = dataMatrix.sparkContext
25 |     val bc_cnn_layers = sc.broadcast(cnn_layers)
26 |     val bc_cnn_ffW = sc.broadcast(cnn_ffW)
27 |     val bc_cnn_ffb = sc.broadcast(cnn_ffb)
28 |     // CNNff是进行前向传播
29 |     val train_cnnff = CNN.CNNff(dataMatrix, bc_cnn_layers, bc_cnn_ffb, bc_cnn_ffW)
30 |     val rdd_predict = train_cnnff.map { f =>
31 |       val label = f._1
32 |       val nna1 = f._2(0)(0)
33 |       val nnan = f._4
34 |       val error = f._4 - f._1
35 |       PredictCNNLabel(label, nna1, nnan, error)
36 |     }
37 |     rdd_predict
38 |   }
39 | 
40 |   /**
41 |     * 计算输出误差
42 |     * 平均误差;
43 |     */
44 |   def Loss(predict: RDD[PredictCNNLabel]): Double = {
45 |     val predict1 = predict.map(f => f.error)
46 |     // error and loss
47 |     // 输出误差计算
48 |     val loss1 = predict1
49 |     val (loss2, counte) = loss1.treeAggregate((0.0, 0L))(
50 |       seqOp = (c, v) => {
51 |         // c: (e, count), v: (m)
52 |         val e1 = c._1
53 |         val e2 = (v :* v).sum
54 |         val esum = e1 + e2
55 |         (esum, c._2 + 1)
56 |       },
57 |       combOp = (c1, c2) => {
58 |         // c: (e, count)
59 |         val e1 = c1._1
60 |         val e2 = c2._1
61 |         val esum = e1 + e2
62 |         (esum, c1._2 + c2._2)
63 |       })
64 |     val Loss = (loss2 / counte.toDouble) * 0.5
65 |     Loss
66 |   }
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/src/main/scala/deeplearning/tests/Test_example_CNN.scala:
--------------------------------------------------------------------------------
 1 | package tests
 2 | 
 3 | import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, DenseVector => BDV, Matrix => BM, SparseVector => BSV, Vector => BV, axpy => brzAxpy, max => Bmax, min => Bmin, sum => Bsum, svd => brzSvd}
 4 | import deeplearning.cnn.CNN
 5 | import org.apache.log4j.{Level, Logger}
 6 | import org.apache.spark.{SparkConf, SparkContext}
 7 | 
 8 | object Test_example_CNN {
 9 | 
10 |   def main(args: Array[String]) {
11 |     //1 构建Spark对象
12 |     val conf = new SparkConf().setAppName("CNNtest").setMaster("local")
13 |     val sc = new SparkContext(conf)
14 | 
15 |     //2 测试数据
16 |     Logger.getRootLogger.setLevel(Level.WARN)
17 |     val data_path = "/Users/li/workshop/DataSet/deeplearning/train_d3.txt"
18 |     val examples = sc.textFile(data_path).cache()
19 |     val train_d1 = examples.map { line =>
20 |       val f1 = line.split("\t")
21 |       val f = f1.map(f => f.toDouble)
22 |       val y = f.slice(0, 4)
23 |       val x = f.slice(4, f.length)
24 |       (new BDM(1, y.length, y), new BDM(1, x.length, x))
25 |     }
26 | 
27 |     val train_d = train_d1.map(f => (f._1, f._2))
28 | 
29 | 
30 |     //3 设置训练参数，建立模型
31 |     // opts:迭代步长，迭代次数，交叉验证比例
32 |     val opts = Array(50.0, 1.0, 0.0)
33 |     train_d.cache
34 |     val numExamples = train_d.count()
35 |     println(s"numExamples = $numExamples.")
36 | 
37 |     val CNNmodel = new CNN()
38 |       .setMapsize(new BDM(1, 2, Array(28.0, 28.0)))
39 |       .setTypes(Array("i", "c", "s", "c", "s"))
40 |       .setLayer(5)
41 |       .setOnum(10)
42 |       .setOutputmaps(Array(0.0, 6.0, 0.0, 12.0, 0.0))
43 |       .setKernelsize(Array(0.0, 5.0, 0.0, 5.0, 0.0))
44 |       .setScale(Array(0.0, 0.0, 2.0, 0.0, 2.0))
45 |       .setAlpha(1.0)
46 |       .CNNtrain(train_d, opts)
47 | 
48 |     //4 模型测试
49 |     val CNNforecast = CNNmodel.predict(train_d)
50 |     val CNNerror = CNNmodel.Loss(CNNforecast)
51 |     println(s"NNerror = $CNNerror.")
52 |     val printf1 = CNNforecast.map(f => (f.label.data,  f.predict_label.data)).take(200)
53 |     println("预测值")
54 |     for (i <- 0 until printf1.length) {
55 |       val outi = printf1(i)._2.mkString("\t")
56 |       println(outi)
57 |     }
58 | 
59 |   }
60 | }


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/telecomdataprocessing/util/HBaseUtil.scala:
--------------------------------------------------------------------------------
 1 | package telecomdataprocessing.util
 2 | 
 3 | import com.ibm.icu.text.CharsetDetector
 4 | import org.apache.hadoop.conf.Configuration
 5 | import org.apache.hadoop.hbase.HBaseConfiguration
 6 | import org.apache.hadoop.hbase.client.Result
 7 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
 8 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat
 9 | import org.apache.spark.SparkContext
10 | import org.apache.spark.rdd.RDD
11 | 
12 | import scala.xml.{Elem, XML}
13 | 
14 | /**
15 |   * Created by li on 16/7/7.
16 |   */
17 | object HBaseUtil {
18 | 
19 |   /**
20 |     * 识别字符编码
21 |     *
22 |     * @param html 地址编码
23 |     * @return 字符编码
24 |     */
25 |   def judgeChaser(html: Array[Byte]): String = {
26 | 
27 |     val icu4j = new CharsetDetector()
28 |     icu4j.setText(html)
29 |     val encoding = icu4j.detect()
30 | 
31 |     encoding.getName
32 |   }
33 | 
34 |   /**
35 |     * 获取xml格式的配置文件
36 |     *
37 |     * @param dir 配置文件所在的文件目录
38 |     * @return
39 |     * @return Li Yu
40 |     * @note rowNum: 2
41 |     */
42 |   def readConfigFile(dir: String): Elem = {
43 | 
44 |     val configFile = XML.loadFile(dir)
45 | 
46 |     configFile
47 |   }
48 | 
49 |   /**
50 |     * 获取hbase配置内容,并且初始化hbase配置
51 |     *
52 |     * @param configFile hbase配置文件
53 |     * @return
54 |     * @return Li Yu
55 |     * @note rowNum: 7
56 |     */
57 |   def setHBaseConfigure(configFile: Elem): Configuration = {
58 | 
59 |     val rootDir = (configFile \ "hbase" \ "rootDir").text
60 |     val ip = (configFile \ "hbase" \ "ip").text
61 | 
62 |     // 初始化配置
63 |     val configuration = HBaseConfiguration.create()
64 |     configuration.set("hbase.rootdir", rootDir)
65 |     configuration.set("hbase.zookeeper.quorum", ip)
66 | 
67 |     configuration
68 |   }
69 | 
70 |   /**
71 |     * 获取hbase中的内容
72 |     *
73 |     * @param sc SparkContext
74 |     * @param confDir 配置文件所在的文件夹
75 |     * @author Li Yu
76 |     * @note rowNum: 7
77 |     */
78 |   def getHBaseConf(sc: SparkContext, confDir: String, tableName: String) : RDD[(ImmutableBytesWritable, Result)] = {
79 | 
80 |     val configFile = HBaseUtil.readConfigFile(confDir)
81 |     val configuration = HBaseUtil.setHBaseConfigure(configFile)
82 | 
83 |     configuration.set(TableInputFormat.INPUT_TABLE, tableName)
84 | 
85 |     // 使用Hadoop api来创建一个RDD
86 |     val hBaseRDD = sc.newAPIHadoopRDD(configuration,
87 |       classOf[TableInputFormat],
88 |       classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
89 |       classOf[org.apache.hadoop.hbase.client.Result])
90 | 
91 |     hBaseRDD
92 |   }
93 | 
94 | }
95 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/telecomdataprocessing/util/LoggerUtil.scala:
--------------------------------------------------------------------------------
 1 | package telecomdataprocessing.util
 2 | 
 3 | import org.apache.log4j.{BasicConfigurator, Logger}
 4 | 
 5 | /**
 6 |   * 写Log操作
 7 |   */
 8 | object LoggerUtil {
 9 | 
10 |   var logger = Logger.getLogger("TelecomData_Processing")
11 |   BasicConfigurator.configure()
12 | //  PropertyConfigurator.configure("/home/mlearning/tdt/conf/log4j.properties")
13 | 
14 |   def exception(e: Exception) = {
15 | 
16 |     logger.error(e.printStackTrace())
17 | 
18 |   }
19 | 
20 |   def error(msg: String): Unit = {
21 | 
22 |       logger.error(msg)
23 |   }
24 | 
25 |   def warn(msg: String): Unit = {
26 | 
27 |       logger.warn(msg)
28 |   }
29 | 
30 |   def info(msg: String): Unit = {
31 | 
32 |       logger.info(msg)
33 |   }
34 | 
35 |   def debug(msg: String): Unit = {
36 | 
37 |       logger.debug(msg)
38 |   }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/telecomdataprocessingAll/TelecomDataProcess.scala:
--------------------------------------------------------------------------------
  1 | //package com.kunyan.dxdataprocess
  2 | //
  3 | //import java.text.SimpleDateFormat
  4 | //
  5 | //import org.apache.spark.{SparkConf, SparkContext}
  6 | //import util.HBaseUtil
  7 | //
  8 | //import scala.collection.mutable.ArrayBuffer
  9 | //
 10 | ///**
 11 | //  * Created by QQ on 7/25/16.
 12 | //  */
 13 | //object TelecomDataProcess {
 14 | //
 15 | //  def getDayTimeStamp(startDay: String): Long = {
 16 | //
 17 | //    val sdf = new SimpleDateFormat("yyyy-MM-dd")
 18 | //    val dayStamp = sdf.parse(startDay).getTime
 19 | //
 20 | //    dayStamp
 21 | //  }
 22 | //
 23 | //  /**
 24 | //    * 给定时间范围，根据时间窗口长度，获取若干组时间窗口
 25 | //    *
 26 | //    * @param startTime 起始时间戳
 27 | //    * @param endTime 结束时间戳
 28 | //    * @param timeRange 事件窗口
 29 | //    * @return Array[(Long, Long)]
 30 | //    * @note rowNum:11
 31 | //    */
 32 | //  def makeHourTimeWindows(startTime: Long, endTime: Long, timeRange: Int): Array[(Long, Long)] = {
 33 | //
 34 | //    var count = startTime
 35 | //    val dayWindows = ArrayBuffer[(Long, Long)]()
 36 | //
 37 | //    do {
 38 | //
 39 | //      // (start, start + timeRange - 1)
 40 | //      dayWindows.append((count, count + 60L * 60 * 1000 * timeRange - 1))
 41 | //      count += 60L * 60 * 1000
 42 | //
 43 | //    } while (count < endTime)
 44 | //
 45 | //    dayWindows.toArray
 46 | //  }
 47 | //
 48 | //  def judgeTimeWindow(time: Long, timeWindow: Array[(Long, Long)]): (Long, Long) = {
 49 | //
 50 | //    timeWindow.foreach(line => {
 51 | //      if (time >= line._1 && time <= line._2){
 52 | //        return line
 53 | //      }
 54 | //    })
 55 | //
 56 | //    (-1L, -1L)
 57 | //  }
 58 | //
 59 | //  def urlFormat(url: String): String = {
 60 | //
 61 | //    val temp = url.split("://")
 62 | //
 63 | //    temp.length match {
 64 | //      case 1 => temp(0).replaceAll("wwww", "")
 65 | //      case 2 => temp(1).replaceAll("wwww", "")
 66 | //    }
 67 | //  }
 68 | //
 69 | //  def main(args: Array[String]) {
 70 | //
 71 | //    val conf = new SparkConf()
 72 | //      .setAppName(s"Warren_TelecomData_Processing_${args(0)}")
 73 | //      .set("dfs.replication", "1")
 74 | //    //      .setMaster("local")
 75 | //    //      .set("spark.driver.host","192.168.2.90")
 76 | //    val sc = new SparkContext(conf)
 77 | //
 78 | //    val jsonConfig = new JsonConfig
 79 | //    jsonConfig.initConfig(args(1))
 80 | //
 81 | //    val hbaseConfig = HBaseUtil.getHbaseConf(jsonConfig.getValue("hbase", "rootDir"),
 82 | //      jsonConfig.getValue("hbase", "ips"))
 83 | //
 84 | //    val startDayTimeStamp = getDayTimeStamp(args(0))
 85 | //    val endDayTimeStamp = startDayTimeStamp + 24L * 60 * 60 * 1000
 86 | //
 87 | //    // 获取时间窗口
 88 | //    val timeRanges = sc.broadcast(makeHourTimeWindows(startDayTimeStamp, endDayTimeStamp, 1))
 89 | //
 90 | //    // 获取电信数据
 91 | //    val teleData = sc.textFile(jsonConfig.getValue("tp", "telecomDataPath") + s"/${args(0)}}",
 92 | //      jsonConfig.getValue("tp", "partition").toInt)
 93 | //
 94 | //    // 获取所有需要匹配的，并广播
 95 | //    val urlsBr = sc.broadcast(HBaseUtil.getRDD(sc, hbaseConfig).map(x => urlFormat(x.split("\n\t")(0))).collect()) // 这一步需要对从其他地方获取到新闻url做一些处理，例如去掉www和http
 96 | //
 97 | //    // 分组计算
 98 | //    teleData.map(row => {
 99 | //      val tmp = row.split("\t")
100 | //      val url = urlFormat(tmp(3) + tmp(4))
101 | //      val time = tmp(0)
102 | //
103 | //      (url, time)
104 | //    }).filter(x => urlsBr.value.contains(x._1)).map(row => {
105 | //
106 | //      val timeWindow = judgeTimeWindow(row._2.toLong, timeRanges.value)
107 | //
108 | //      ((timeWindow._1, timeWindow._2, row._1), 1L)
109 | //    }).reduceByKey(_ + _).saveAsTextFile(jsonConfig.getValue("tp", "outputPath") + s"/${args(0)}")
110 | //  }
111 | //}
112 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/telecomdataprocessingAll/readFromHdfs.scala:
--------------------------------------------------------------------------------
 1 | //package telecomdataprocessingAll
 2 | //
 3 | //import org.apache.spark.{SparkConf, SparkContext}
 4 | //import util.LoggerUtil
 5 | //
 6 | ///**
 7 | //  * Created by li on 16/7/27.
 8 | //  */
 9 | //object readFromHdfs {
10 | //
11 | //  def main(args: Array[String]) {
12 | //
13 | //    val conf = new SparkConf().setAppName("Warren_ReadFrom_Hdfs_filter")
14 | //
15 | //    val sc = new SparkContext(conf)
16 | //
17 | //    val hdfsDir = args(0)
18 | ////    val hdfsDir = "hdfs://222.73.57.12:9000/telecom/shdx/origin/data/"
19 | //
20 | //    val setTime = args(1)
21 | ////    val setTime = "2016-07-23"
22 | //
23 | //
24 | //    val time = System.currentTimeMillis()
25 | //
26 | //    LoggerUtil.warn("time2Start:" +"%s".format(time)+ " 》》》》》》》》》》》》")
27 | //    // 数据获取开始和截止时间
28 | //    val stopTimeStamp = TDP.getDayTimeStamp(setTime)
29 | //    val startTimeStamp = stopTimeStamp - 24 * 60 * 60 * 1000
30 | //    val timeRanges = sc.broadcast(TDP.makeHourTimeWindows(startTimeStamp, stopTimeStamp -1, 1))
31 | //
32 | //    // 23个新闻网站的host域名
33 | //    val urlUnion = Array("yicai.com", "21cn.com", "d.weibo.com","xueqiu.com","10jqka.com.cn","gw.com.cn",
34 | //    "eastmoney.com","p5w.net","stockstar.com","hexun.com","caijing.com.cn","jrj.com.cn","cfi.net.cn","cs.com.cn",
35 | //    "cnstock.com", "stcn.com","news.cn","finance.ifeng.com","finance.sina.com.cn","business.sohu.com","money.163.com",
36 | //      "wallstreetcn.com","finance.qq.com","moer.jiemian.com","www.szse.cn","weixin.sogou.com","sse.com.cn","zqyjbg.com")
37 | //
38 | //    val dataFromHDFS2 = sc.textFile(hdfsDir + setTime + "/*")
39 | //      .filter(! _.contains("home/telecom"))
40 | //      .filter(! _.contains("youchaojiang"))
41 | //      .map(_.split("\t"))
42 | //      .filter(_.length == 8)
43 | //      .filter(x => urlUnion.contains(TDP.urlFormat(x(3))))
44 | //      .map(x => (TDP.urlFormat(x(3) + x(4)), x(0)))
45 | //
46 | //    val result = dataFromHDFS2.map(row => {
47 | //
48 | //      val timeWindow = TDP.judgeTimeWindow(row._2.toLong, timeRanges.value)
49 | //
50 | //      ((timeWindow._1, timeWindow._2, row._1), 1L)
51 | //    }).reduceByKey(_ + _).count()
52 | //
53 | //    println(result)
54 | //
55 | //
56 | //    LoggerUtil.warn("time2End:" +"%s".format(time)+ " 》》》》》》》》》》》》")
57 | //
58 | //  }
59 | //
60 | //}
61 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/telecomdataprocessingAll/util/HDFSUtil.scala:
--------------------------------------------------------------------------------
 1 | package telecomdataprocessingAll.util
 2 | 
 3 | import java.text.SimpleDateFormat
 4 | 
 5 | /**
 6 |   * Created by li on 16/7/25.
 7 |   */
 8 | object HDFSUtil {
 9 | 
10 | 
11 |   def main(args: Array[String]) {
12 |     val dataFormat = new SimpleDateFormat("yyyy-MM-dd")
13 |     val startTime = dataFormat.parse("2012-12-12")
14 |     val startTimeStamp = startTime.getTime
15 |     val stopTimeStamp = startTime.getTime - 24 * 60 * 60 * 1000 -1
16 | 
17 | 
18 |     println(startTimeStamp, stopTimeStamp)
19 |   }
20 | 
21 | 
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/telecomdataprocessingAll/util/LoggerUtil.scala:
--------------------------------------------------------------------------------
 1 | package telecomdataprocessingAll.util
 2 | 
 3 | import org.apache.log4j.{BasicConfigurator, Logger}
 4 | 
 5 | /**
 6 |   * 写Log操作
 7 |   */
 8 | object LoggerUtil {
 9 | 
10 |   var logger = Logger.getLogger("TelecomData_Processing")
11 |   BasicConfigurator.configure()
12 | //  PropertyConfigurator.configure("/home/mlearning/tdt/conf/log4j.properties")
13 | 
14 |   def exception(e: Exception) = {
15 | 
16 |     logger.error(e.printStackTrace())
17 | 
18 |   }
19 | 
20 |   def error(msg: String): Unit = {
21 | 
22 |       logger.error(msg)
23 |   }
24 | 
25 |   def warn(msg: String): Unit = {
26 | 
27 |       logger.warn(msg)
28 |   }
29 | 
30 |   def info(msg: String): Unit = {
31 | 
32 |       logger.info(msg)
33 |   }
34 | 
35 |   def debug(msg: String): Unit = {
36 | 
37 |       logger.debug(msg)
38 |   }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/telecomdataprocessingAll/util/TimeUtil.scala:
--------------------------------------------------------------------------------
  1 | package telecomdataprocessingAll.util
  2 | 
  3 | import java.math.BigInteger
  4 | import java.text.SimpleDateFormat
  5 | import java.util.{Calendar, Date}
  6 | 
  7 | import org.apache.hadoop.hbase.client.Scan
  8 | import org.apache.hadoop.hbase.protobuf.ProtobufUtil
  9 | import org.apache.hadoop.hbase.protobuf.generated.ClientProtos
 10 | import org.apache.hadoop.hbase.util.Base64
 11 | 
 12 | /**
 13 |   * Created by C.J.YOU on 2016/1/13.
 14 |   * 格式化时间的工具类
 15 |   */
 16 |  object TimeUtil {
 17 | 
 18 |   def getTime(timeStamp: String): String = {
 19 |     val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd_HH:mm:ss")
 20 |     val bigInt: BigInteger = new BigInteger(timeStamp)
 21 |     val date: String = sdf.format(bigInt)
 22 |     date
 23 |   }
 24 | 
 25 |   def getDay: String = {
 26 |     val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
 27 |     val date: String = sdf.format(new Date)
 28 |     date
 29 |   }
 30 | 
 31 |   def getCurrentHour: Int = {
 32 |     val calendar = Calendar.getInstance
 33 |     calendar.setTime(new Date)
 34 |     calendar.get(Calendar.HOUR_OF_DAY)
 35 |   }
 36 | 
 37 |   def getPreHourStr: String = {
 38 |     val date = new Date(new Date().getTime - 60 * 60 * 1000)
 39 |     val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd-HH")
 40 |     sdf.format(date)
 41 |   }
 42 | 
 43 |   /**
 44 |     * 获取今天的日期
 45 |     *
 46 |     * @return
 47 |     */
 48 |   def getNowDate(): String = {
 49 |     val now: Date = new Date()
 50 |     val  dateFormat: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
 51 |     val res = dateFormat.format( now )
 52 |     res
 53 |   }
 54 | 
 55 | 
 56 |   /**
 57 |     * 获取本周的开始时间
 58 |     */
 59 |   def Null(){
 60 | 
 61 |   }
 62 | 
 63 |   /**
 64 |     * 获取本月的开始时间
 65 |     * http://blog.csdn.net/springlustre/article/details/47273353
 66 |     */
 67 | 
 68 | 
 69 |   /**
 70 |     * 设置时间范围
 71 |     *
 72 |     * @return 时间范围
 73 |     * @author yangshuai
 74 |     */
 75 |   def setTimeRange(): String = {
 76 | 
 77 |     val scan = new Scan()
 78 |     val date = new Date(new Date().getTime - 30 * 24 * 60 * 60 * 1000)
 79 |     val format = new SimpleDateFormat("yyyy-MM-dd HH")
 80 |     val time = format.format(date)
 81 |     val time1 = format.format(new Date().getTime)
 82 |     val startTime = time + "-00-00"
 83 |     val stopTime = time1 + "-00-00"
 84 |     val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH-mm-ss")
 85 |     val startRow: Long = sdf.parse(startTime).getTime
 86 |     val stopRow: Long = sdf.parse(stopTime).getTime
 87 | 
 88 |     scan.setTimeRange(startRow, stopRow)
 89 |     val proto: ClientProtos.Scan = ProtobufUtil.toScan(scan)
 90 | 
 91 |     Base64.encodeBytes(proto.toByteArray)
 92 |   }
 93 | 
 94 |   /**
 95 |     * 设置制定的时间范围(一天)
 96 |     * @param time 指定的日期
 97 |     * @return 指定日期至前一天时间范围
 98 |     */
 99 |   def setAssignedTimeRange(time: String): String = {
100 | 
101 |     val format = new SimpleDateFormat("yyyy-MM-dd")
102 | 
103 |     val date = format.parse(time)
104 | 
105 |     val endTime = new Date(date.getTime - 24 * 60 * 60 * 1000)
106 | 
107 |     val stopTime = format.format(endTime)
108 | 
109 |     val startDate = time + "-00-00-00"
110 |     val stopDate = stopTime  + "-00-00-00"
111 | 
112 |     val sdf = new SimpleDateFormat("yyyy-MM-dd HH-mm-ss")
113 |     val startRaw = sdf.parse(startDate).getTime
114 |     val stopRaw = sdf.parse(stopDate).getTime
115 | 
116 |     val scan = new Scan()
117 |     scan.setTimeRange(startRaw, stopRaw)
118 | 
119 |     val proto = ProtobufUtil.toScan(scan)
120 | 
121 |     Base64.encodeBytes(proto.toByteArray)
122 |   }
123 | 
124 | 
125 | }
126 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/vipstockstatistic/CorpusBuild.scala:
--------------------------------------------------------------------------------
 1 | package dataprocess.vipstockstatistic
 2 | 
 3 | import com.kunyandata.nlpsuit.util.{TextPreprocessing, KunyanConf}
 4 | import org.apache.spark.rdd.RDD
 5 | 
 6 | import scala.xml.XML
 7 | 
 8 | /**
 9 |   * Created by li on 2016/8/23.
10 |   * 调用坤雁分词系统
11 |   */
12 | object CorpusBuild {
13 | 
14 |   /**
15 |     * 配置文件初始化
16 |     *
17 |     * @param xmlConfPath 配置文件输入路径
18 |     * @return 初始化后的配置文件
19 |     * @author Li Yu
20 |     * @note rowNum = 6
21 |     */
22 |   def paramInit(xmlConfPath: String): KunyanConf = {
23 | 
24 |     val kunyanConf = new KunyanConf
25 |     val confFile = XML.loadFile(xmlConfPath)
26 | 
27 |     val kunyanHost = { confFile \ "kunyan" \ "kunyanHost" }.text
28 |     val kunyanPort = { confFile \ "kunyan" \ "kunyanPort" }.text.toInt
29 |     kunyanConf.set(kunyanHost, kunyanPort)
30 | 
31 |     kunyanConf
32 |   }
33 | 
34 |   /**
35 |     * 分词程序
36 |     *
37 |     * @param xmlPath 主程序输入参数
38 |     * @author Li Yu
39 |     * @note rownum = 6
40 |     */
41 |   def run(xmlPath: String, news: RDD[Array[String]]): RDD[(String, String)] = {
42 | 
43 |     System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
44 | 
45 |     // 配置文件初始化
46 |     val kunyanConf = paramInit(xmlPath)
47 | 
48 |     // 调用分词系统，输出内容为URL 分词结果
49 |     val stopWords = Array(" ")
50 |     val corpus = news.map(row => (row(2), TextPreprocessing.process(row(3), stopWords, kunyanConf).mkString(",")))
51 | 
52 |     corpus
53 |   }
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/vipstockstatistic/util/AnsjAnalyzer.scala:
--------------------------------------------------------------------------------
 1 | package dataprocess.vipstockstatistic.util
 2 | 
 3 | import org.ansj.library.UserDefineLibrary
 4 | import org.ansj.splitWord.analysis.{NlpAnalysis, ToAnalysis}
 5 | import org.apache.spark.SparkContext
 6 | 
 7 | 
 8 | /**
 9 |   * Created by zhangxin on 2016/3/8
10 |   * 基于ansj的分词工具
11 |   */
12 | object AnsjAnalyzer {
13 | 
14 |   /**
15 |     * ansj分词器初始化, 添加用户词典
16 |     *
17 |     * @param sc  spark程序入口
18 |     * @param userDic 用户词典数组
19 |     * @return 无
20 |     * @author zhangxin
21 |     */
22 |   def init(sc: SparkContext, userDic: Array[String]): Unit = {
23 | 
24 |     if(userDic != null ){
25 |       userDic.foreach(addUserDic(_, sc))
26 |     }
27 | 
28 |   }
29 | 
30 |   /**
31 |     * 添加用户词典到分词器
32 |     *
33 |     * @param dicPath  词典路径
34 |     * @param sc spark程序入口
35 |     * @return 无
36 |     * @author zhangxin
37 |     */
38 |   def addUserDic(dicPath: String, sc: SparkContext): Unit = {
39 | 
40 |     //读取词典
41 |     val dic = sc.textFile(dicPath).collect()
42 | 
43 |     //添加到ansj中
44 |     dic.foreach(UserDefineLibrary.insertWord(_, "userDefine", 100))
45 | 
46 |   }
47 | 
48 |   /**
49 |     * 标准分词 ，无词性标注
50 |     *
51 |     * @param sentence  待分词语句
52 |     * @return 分词结果
53 |     * @author zhangxin
54 |     */
55 |   def cutNoTag(sentence: String): Array[String] = {
56 | 
57 |     //切词
58 |     val sent = ToAnalysis.parse(sentence)
59 | 
60 |     //提取分词结果，过滤词性
61 |     val words = for(i <- Range(0, sent.size())) yield sent.get(i).getName
62 | 
63 |     words.toArray
64 |   }
65 | 
66 |   /**
67 |     * 自然语言分词，带词性标注
68 |     *
69 |     * @param sentence  待分词句子
70 |     * @return  分词结果
71 |     * @author zhangxin
72 |     */
73 |   def cutWithTag(sentence: String):Array[String]={
74 | 
75 |     // 切词
76 |     val sent = NlpAnalysis.parse(sentence)
77 | 
78 |     // 提取分词结果
79 |     val words= for(i <- Range(0, sent.size())) yield sent.get(i).getName
80 | 
81 |     words.toArray
82 |   }
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/vipstockstatistic/util/HBaseUtil.scala:
--------------------------------------------------------------------------------
  1 | package dataprocess.vipstockstatistic.util
  2 | 
  3 | import java.text.SimpleDateFormat
  4 | import com.ibm.icu.text.CharsetDetector
  5 | import org.apache.hadoop.conf.Configuration
  6 | import org.apache.hadoop.hbase.HBaseConfiguration
  7 | import org.apache.hadoop.hbase.client.{Result, Scan}
  8 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
  9 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat
 10 | import org.apache.hadoop.hbase.protobuf.ProtobufUtil
 11 | import org.apache.hadoop.hbase.protobuf.generated.ClientProtos
 12 | import org.apache.hadoop.hbase.util.Base64
 13 | import org.apache.spark.SparkContext
 14 | import org.apache.spark.rdd.RDD
 15 | import scala.xml.{Elem, XML}
 16 | 
 17 | /**
 18 |   * Created by li on 16/7/7.
 19 |   */
 20 | object HBaseUtil {
 21 | 
 22 |   /**
 23 |     * 设置时间范围
 24 |     *
 25 |     * @return 时间范围
 26 |     * @author yangshuai
 27 |     */
 28 |   def setTimeRange(startDay: String): String = {
 29 | 
 30 |     val scan = new Scan()
 31 | 
 32 |     val sdf = new SimpleDateFormat("yyyy-MM-dd")
 33 |     val startRow = sdf.parse(startDay).getTime
 34 |     val stopRow = startRow + 24 * 60 * 60 * 1000 - 1
 35 | 
 36 |     scan.setTimeRange(startRow, stopRow)
 37 |     val proto: ClientProtos.Scan = ProtobufUtil.toScan(scan)
 38 | 
 39 |     Base64.encodeBytes(proto.toByteArray)
 40 |   }
 41 | 
 42 |   /**
 43 |     * 识别字符编码
 44 |     *
 45 |     * @param html 地址编码
 46 |     * @return 字符编码
 47 |     */
 48 |   def judgeChaser(html: Array[Byte]): String = {
 49 | 
 50 |     val icu4j = new CharsetDetector()
 51 |     icu4j.setText(html)
 52 |     val encoding = icu4j.detect()
 53 | 
 54 |     encoding.getName
 55 |   }
 56 | 
 57 |   /**
 58 |     * 获取xml格式的配置文件
 59 |     *
 60 |     * @param dir 配置文件所在的文件目录
 61 |     * @return
 62 |     * @return Li Yu
 63 |     * @note rowNum: 2
 64 |     */
 65 |   def readConfigFile(dir: String): Elem = {
 66 | 
 67 |     val configFile = XML.loadFile(dir)
 68 | 
 69 |     configFile
 70 |   }
 71 | 
 72 |   /**
 73 |     * 获取hbase配置内容,并且初始化hbase配置
 74 |     *
 75 |     * @param configFile hbase配置文件
 76 |     * @return
 77 |     * @return Li Yu
 78 |     * @note rowNum: 7
 79 |     */
 80 |   def setHBaseConfigure(configFile: Elem): Configuration = {
 81 | 
 82 |     val rootDir = (configFile \ "hbase" \ "rootDir").text
 83 |     val ip = (configFile \ "hbase" \ "ip").text
 84 | 
 85 |     // 初始化配置
 86 |     val configuration = HBaseConfiguration.create()
 87 |     configuration.set("hbase.rootdir", rootDir)
 88 |     configuration.set("hbase.zookeeper.quorum", ip)
 89 | 
 90 |     configuration
 91 |   }
 92 | 
 93 |   /**
 94 |     * 获取hbase中的内容
 95 |     *
 96 |     * @param sc SparkContext
 97 |     * @param confDir 配置文件所在的文件夹
 98 |     * @author Li Yu
 99 |     * @note rowNum: 7
100 |     */
101 |   def getHBaseConf(sc: SparkContext, confDir: String, tableName: String) : RDD[(ImmutableBytesWritable, Result)] = {
102 | 
103 |     val configFile = readConfigFile(confDir)
104 |     val configuration = setHBaseConfigure(configFile)
105 | 
106 |     configuration.set(TableInputFormat.INPUT_TABLE, tableName)
107 |     // configuration.set(TableInputFormat.SCAN, timeRange)
108 | 
109 |     // 使用Hadoop api来创建一个RDD
110 |     val hBaseRDD = sc.newAPIHadoopRDD(configuration,
111 |       classOf[TableInputFormat],
112 |       classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
113 |       classOf[org.apache.hadoop.hbase.client.Result])
114 | 
115 |     hBaseRDD
116 |   }
117 | 
118 | }
119 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/vipstockstatistic/util/LoggerUtil.scala:
--------------------------------------------------------------------------------
 1 | package dataprocess.vipstockstatistic.util
 2 | 
 3 | import org.apache.log4j.{BasicConfigurator, Logger}
 4 | 
 5 | /**
 6 |   * 写Log操作
 7 |   */
 8 | object LoggerUtil {
 9 | 
10 |   var logger = Logger.getLogger("Warren_VipStockStatistic_Processing")
11 |   BasicConfigurator.configure()
12 | //  PropertyConfigurator.configure("/home/alg/telecomdataprocess/conf/log4j.properties")
13 | 
14 |   def exception(e: Exception) = {
15 | 
16 |     logger.error(e.printStackTrace())
17 | 
18 |   }
19 | 
20 |   def error(msg: String): Unit = {
21 | 
22 |       logger.error(msg)
23 |   }
24 | 
25 |   def warn(msg: String): Unit = {
26 | 
27 |       logger.warn(msg)
28 |   }
29 | 
30 |   def info(msg: String): Unit = {
31 | 
32 |       logger.info(msg)
33 |   }
34 | 
35 |   def debug(msg: String): Unit = {
36 | 
37 |       logger.debug(msg)
38 |   }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/vipstockstatistic/util/RedisUtil.scala:
--------------------------------------------------------------------------------
 1 | package dataprocess.vipstockstatistic.util
 2 | 
 3 | import redis.clients.jedis.Jedis
 4 | 
 5 | import scala.xml.XML
 6 | 
 7 | /**
 8 |   * Created by li on 16/8/23.
 9 |   */
10 | object RedisUtil {
11 | 
12 |   var jedis: Jedis = null
13 | 
14 |   /**
15 |     * 初始化 redis
16 |     *
17 |     * @param confDir 配置文件对应的 xml 对象
18 |     * @note rowNum: 10
19 |     */
20 |   def initRedis(confDir: String): Jedis = {
21 | 
22 |     val configFile = XML.loadFile(confDir)
23 | 
24 |     val redisIp = (configFile \ "redis" \ "ip").text
25 |     val redisPort = (configFile \ "redis" \ "port").text.toInt
26 |     val redisDB = (configFile \ "redis" \ "db").text.toInt
27 |     val redisAuth = (configFile \ "redis" \ "auth").text
28 | 
29 |     jedis = new Jedis(redisIp, redisPort)
30 |     jedis.auth(redisAuth)
31 |     jedis.select(redisDB)
32 | 
33 |     jedis
34 |   }
35 | 
36 |   /**
37 |     * 将结果保存到redis
38 |     *
39 |     * @param  resultData 需要保存的数据
40 |     * @author Li Yu
41 |     * @note rowNum: 12
42 |     */
43 |   def write2Redis(resultData: Array[(String, String)], time: String, dataType: String, confDir: String): Unit = {
44 | 
45 |     val jedis = initRedis(confDir)
46 | 
47 |     resultData.foreach{ x => {
48 | 
49 |       jedis.zadd(s"vipstockstatistic_$dataType" + s"_$time", x._2.toDouble, x._1)
50 |     }}
51 |   }
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/src/main/scala/intactprogram/vipstockstatistic/util/config.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xml>
 3 | 
 4 |     <hbase>
 5 |         <rootDir>hdfs://61.147.114.85:9000/hbase</rootDir>
 6 |         <ip>slave1,slave2,slave3</ip>
 7 |     </hbase>
 8 | 
 9 |     <hdfs>
10 |         <rootDir>hdfs://61.147.114.85:9000</rootDir>
11 |     </hdfs>
12 | 
13 |     <kunyan>
14 |         <kunyanHost>61.147.114.88</kunyanHost>
15 |         <kunyanPort>16003</kunyanPort>
16 |     </kunyan>
17 | 
18 |     <redis>
19 |         <ip>61.147.114.72</ip>
20 |         <port>6666</port>
21 |         <db>db9</db>
22 |         <auth>backtest</auth>
23 |     </redis>
24 | 
25 | </xml>


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/Recommendation/SparkMLlibColbFilter.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.Recommendation
 2 | import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | 
 6 | /**
 7 |   * Created by li on 2017/3/23.
 8 |   * 协同过滤ALS算法推荐过程如下：
 9 |   * 加载数据到 ratings RDD，每行记录包括：user, product, rate
10 |   * 从 ratings 得到用户商品的数据集：(user, product)
11 |   * 使用ALS对 ratings 进行训练
12 |   * 通过 model 对用户商品进行预测评分：((user, product), rate)
13 |   * 从 ratings 得到用户商品的实际评分：((user, product), rate)
14 |   * 合并预测评分和实际评分的两个数据集，并求均方差
15 |   */
16 | 
17 | object SparkMLlibColbFilter {
18 | 
19 |   def main(args: Array[String]) {
20 |     val conf = new SparkConf().setAppName("Java Collaborative Filtering Example").setMaster("local")
21 |     val sc = new SparkContext(conf)
22 | 
23 |     // Load and parse the data
24 |     val path = "file:///data/hadoop/spark-2.0.0-bin-hadoop2.7/data/mllib/als/test.data"
25 |     val data = sc.textFile(path)
26 |     val ratings = data.map(_.split(",") match { case Array(user, item, rate) =>
27 |       Rating(user.toInt, item.toInt, rate.toDouble)
28 |     })
29 | 
30 |     // Build the recommendation model using ALS
31 |     val rank = 10
32 |     val numIterations = 10
33 |     val model = ALS.train(ratings, rank, numIterations, 0.01)
34 | 
35 |     // Evauate the model on rating data
36 |     val usersProducts = ratings.map { case Rating(user, product, rate) =>
37 |       (user, product)
38 |     }
39 | 
40 |     val predictions =
41 |       model.predict(usersProducts).map { case Rating(user, product, rate) =>
42 |         ((user, product), rate)
43 |       }
44 | 
45 |     val ratesAndPreds = ratings.map { case Rating(user, product, rate) =>
46 |       ((user, product), rate)
47 |     }.join(predictions)
48 | 
49 |     val MSE = ratesAndPreds.map { case ((user, product), (r1, r2)) =>
50 |       val err = r1 - r2
51 |       err * err
52 |     }.mean()
53 | 
54 |     System.out.println("Mean Squared Error = " + MSE)
55 | 
56 |     // Save and load model
57 |     model.save(sc, "target/tmp/myCollaborativeFilter")
58 |     val sameModel = MatrixFactorizationModel.load(sc, "target/tmp/myCollaborativeFilter")
59 | 
60 | 
61 |     //为每个用户进行推荐，推荐的结果可以以用户id为key，结果为value存入redis或者hbase中
62 |     val users = data.map(_.split(",")(0)).distinct().collect()
63 | 
64 |     for (elem <- users) {
65 | 
66 |       val res = model.recommendProducts(elem.toInt, numIterations)
67 |       res.foreach(itm => (itm.user, itm.product, itm.rating))
68 |     }
69 |   }
70 | }


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/classification/BinaryClassificationWithALS.scala:
--------------------------------------------------------------------------------
  1 | package meachinelearning.classification
  2 | 
  3 | import org.apache.spark.ml.feature.{HashingTF, IDF, StopWordsRemover, Tokenizer}
  4 | import org.apache.spark.mllib.linalg.{Vector, Vectors}
  5 | import org.apache.spark.mllib.regression.LabeledPoint
  6 | import org.apache.spark.sql.{Row, SQLContext}
  7 | import org.apache.spark.{SparkConf, SparkContext}
  8 | 
  9 | import scala.io.Source
 10 | 
 11 | 
 12 | 
 13 | /**
 14 |   * Created by li on 16/4/8.
 15 |   */
 16 | object BinaryClassificationWithALS {
 17 | 
 18 |   val conf = new SparkConf().setMaster("local").setAppName("StopWordRemove")
 19 |   val sc = new SparkContext(conf)
 20 |   val sqlContext = new SQLContext(sc)
 21 | //  val hivecontext = new  HiveContext(sc)
 22 |   import sqlContext.implicits._
 23 | 
 24 | 
 25 |   // DataFrame type 数据集导入
 26 |   //  val src = Source.fromFile("/users/li/Intellij/Native-Byes/nativebyes/wordseg_881156.txt").getLines().toArray
 27 | 
 28 |   // 总数据集获取未平衡
 29 |   //  case class RawDataRecord( category: String ,labels: Double ,text: String)
 30 |   //
 31 |   //  val src = Source.fromFile("/Users/li/Downloads/traningset/HGHQ.txt").getLines().toArray.map{
 32 |   //    line =>
 33 |   //      val data = line.split("\t")
 34 |   //      RawDataRecord(data(1),data(0).toDouble,data(2))
 35 |   //  }
 36 | 
 37 | 
 38 |   //  //  平衡数据集获取
 39 |   case class RawDataRecord(labels: Double ,text: String)
 40 |   val src = sc.textFile("/Users/li/Downloads/trainingSets/保险").map{
 41 |     line =>
 42 |       val data = line.split("\t")
 43 |       RawDataRecord(data(0).toDouble, data(1))
 44 |   }
 45 | 
 46 | 
 47 |   val srcDF = sqlContext.createDataFrame(src)
 48 | 
 49 | 
 50 |   // RDD type
 51 |   //    val srcRDD = sc.textFile("/users/li/Intellij/Native-Byes/nativebyes/wordseg_881156.txt").map {
 52 |   //      x =>
 53 |   //        val data = x.split("\t")
 54 |   //        RawDataRecord(data(0),data(1),labels = if(data(1) == "881108" ) 1.0 else 0.0, data(2))
 55 |   //    }.toDF()//to DataFrame
 56 | 
 57 |   var tokenizer = new Tokenizer().setInputCol("text").setOutputCol("words")
 58 |   var wordsData = tokenizer.transform(srcDF)
 59 | 
 60 |   // 去停用词
 61 |   // 读取停用词表
 62 |   //  val filter = Source.fromFile("/users/li/Intellij/Native-Byes/nativebyes/1.txt" ).getLines().toArray
 63 |   val filter = Source.fromFile("/users/li/Intellij/Native-Byes/nativebyes/stop_words_CN" ).getLines().toArray
 64 | 
 65 |   val remover = new StopWordsRemover()
 66 |     .setInputCol("words")
 67 |     .setOutputCol("filtered")
 68 |     .setStopWords(filter)
 69 | 
 70 |   val removeword = remover.transform(wordsData)
 71 | 
 72 | 
 73 |   //70%作为训练数据，30%作为测试数据
 74 |   val splits = removeword.randomSplit(Array(0.7, 0.3),seed = 11L)
 75 |   //splits.foreach(println)
 76 |   var trainingDF = splits(0)
 77 |   var testDF = splits(1)
 78 | 
 79 | 
 80 | 
 81 |   //使用hashingTF计算每个词在文档中的词频
 82 |   val hashingTF = new HashingTF().setNumFeatures(2000).setInputCol("filtered").setOutputCol("rawFeatures")
 83 |   val featurizedData = hashingTF.transform(trainingDF)
 84 |   // println("output2：")
 85 |   // featurizedData.select($"category", $"words", $"rawFeatures").foreach(println)
 86 |   // featurizedData.show()
 87 | 
 88 | 
 89 |   //计算每个词的TF-IDF
 90 |   var idf = new IDF().setInputCol("rawFeatures").setOutputCol("features")
 91 |   val idfModel = idf.fit(featurizedData)
 92 |   var rescaledData = idfModel.transform(featurizedData)
 93 |   // println("output3：")
 94 |   // rescaledData.select($"category", $"features").foreach(println)
 95 |   //  rescaledData.select($"labels",$"features").show()
 96 | 
 97 | 
 98 |   // 转换成Bayes的输入格式
 99 |   var trainDataRdd = rescaledData.select($"labels",$"features").map {
100 |     case Row(label: Double, features: Vector) =>
101 |       LabeledPoint(label , Vectors.dense(features.toArray))
102 |   }.cache()
103 | 
104 |   //trainDataRdd.foreach(println)
105 | 
106 | 
107 |   /** ALS降维 */
108 |   //  val pca = new PCA(trainDataRdd.first().features.size/2).fit(trainDataRdd.map(_.features))
109 | //  val als = new ALSModel()
110 | //  val pcl = new ALS().setNonnegative(true).setMaxIter(100).fit(trainDataRdd.map(_.features))
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | }
118 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/classification/PCAtest.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.classification
 2 | 
 3 | import org.apache.spark.mllib.feature.PCA
 4 | import org.apache.spark.mllib.linalg.Vectors
 5 | import org.apache.spark.mllib.regression.{LinearRegressionWithSGD, LabeledPoint}
 6 | import org.apache.spark.{SparkConf, SparkContext}
 7 | 
 8 | 
 9 | /**
10 |   * Created by li on 16/4/7.
11 |   */
12 | object PCAtest extends App{
13 | 
14 |   val conf = new SparkConf().setAppName("test").setMaster("local")
15 |   val sc = new SparkContext(conf)
16 | 
17 |   val data = sc.textFile("/Users/li/Downloads/lpsa.data").map { line =>
18 |     val parts = line.split(',')
19 |     LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
20 |   }.cache()
21 | 
22 | 
23 | 
24 | 
25 |   val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
26 |   val training = splits(0).cache()
27 |   val test = splits(1)
28 | 
29 | //  training.foreach(println)
30 | //  println(training.first())
31 | //  println(training.first().features.size/2)
32 | 
33 | 
34 |   val pca = new PCA(training.first().features.size/2).fit(data.map(_.features))
35 | 
36 |   val training_pca = training.map(p => p.copy(features = pca.transform(p.features)))
37 |   val test_pca = test.map(p => p.copy(features = pca.transform(p.features)))
38 | 
39 |   val numIterations = 100
40 |   val model = LinearRegressionWithSGD.train(training, numIterations)
41 |   val model_pca = LinearRegressionWithSGD.train(training_pca, numIterations)
42 | 
43 |   val valuesAndPreds = test.map { point =>
44 |     val score = model.predict(point.features)
45 |     (score, point.label)
46 |   }
47 | 
48 |   val valuesAndPreds_pca = test_pca.map { point =>
49 |     val score = model_pca.predict(point.features)
50 |     (score, point.label)
51 |   }
52 | 
53 |   val MSE = valuesAndPreds.map{case(v, p) => math.pow((v - p), 2)}.mean()
54 |   val MSE_pca = valuesAndPreds_pca.map{case(v, p) => math.pow((v - p), 2)}.mean()
55 | 
56 |   println("Mean Squared Error = " + MSE)
57 |   println("PCA Mean Squared Error = " + MSE_pca)
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/correlationanalysis/correlationAnalysis.scala:
--------------------------------------------------------------------------------
1 | package meachinelearning.correlationanalysis
2 | 
3 | /**
4 |   * Created by li on 16/7/5.
5 |   */
6 | object correlationAnalysis {
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/hotdegreecalculate/CommunityFrequencyStatistics.scala:
--------------------------------------------------------------------------------
  1 | package meachinelearning.hotdegreecalculate
  2 | 
  3 | import org.apache.spark.rdd.RDD
  4 | 
  5 | import scala.collection.mutable
  6 | 
  7 | /**
  8 |   * Created by li on 16/7/5.
  9 |   * 计算社区热度的功能模块, 揉合了社区热词的热度计算,
 10 |   */
 11 | object CommunityFrequencyStatistics {
 12 | 
 13 | 
 14 |   /**
 15 |     * 筛选出出现了社区内词的所有文章
 16 |     *
 17 |     * @param communityWords 社区中的词
 18 |     * @param textWords 新闻
 19 |     * @return Boolean 新闻中存在社区中的词返回true
 20 |     * @author Li Yu
 21 |     * @note rowNum: 11
 22 |     */
 23 |   def filterFunc(communityWords: Array[String],
 24 |                  textWords: Array[String]): Boolean = {
 25 | 
 26 |     communityWords.foreach {
 27 |       word => {
 28 | 
 29 |         if (textWords.contains(word)) {
 30 | 
 31 |           return true
 32 |         }
 33 |       }
 34 |     }
 35 | 
 36 |     false
 37 |   }
 38 | 
 39 |   /**
 40 |     * 统计当前文档库中, 包含社区中提取的关键词的文档数,重复的根据文本ID(url)合并,
 41 |     * 特别针对社区(事件)词, 一个社区中包含若干个词, 并且词变化后对应的社区却没有变化.
 42 |     *
 43 |     * @param fileList 当前文档
 44 |     * @param communityWordList textRank提取的每个社区的关键词
 45 |     * @return [社区ID, 包含社区中关键词的文档总数]包含社区中关键词的文档总数
 46 |     * @author Li Yu
 47 |     * @note rowNum: 13
 48 |     */
 49 |   def communityFrequencyStatisticsRDD(fileList: RDD[Array[String]],
 50 |                                   communityWordList: Array[(String, Array[String])]): Array[(String, Double)] = {
 51 | 
 52 |     val communityList = new mutable.HashMap[String, Double]
 53 | 
 54 |     communityWordList.foreach {
 55 |       community => {
 56 | 
 57 |         val communityID = community._1
 58 |         val communityWords = community._2
 59 |         val temp = fileList.filter(content => filterFunc(communityWords, content)).count().toDouble
 60 | 
 61 |         communityList.+=((communityID, temp))
 62 |       }
 63 |     }
 64 | 
 65 |     communityList.toArray
 66 |   }
 67 | 
 68 | 
 69 |   /**
 70 |     * 统计当前文档库中, 包含社区中提取的关键词的文档数,重复的根据文本ID(url)合并,
 71 |     * 特别针对社区(事件)词, 一个社区中包含若干个词, 并且词变化后对应的社区却没有变化.
 72 |     *
 73 |     * @param fileList 当前文档
 74 |     * @param communityWordList textRank提取的每个社区的关键词
 75 |     * @return [社区ID, 包含社区中关键词的文档总数]包含社区中关键词的文档总数
 76 |     * @author Li Yu
 77 |     * @note rowNum: 22
 78 |     */
 79 |   def communityFrequencyStatistics(fileList: Array[(String, Array[String])],
 80 |                                    communityWordList: Array[(String, Array[String])]): Array[(String, Double)] = {
 81 | 
 82 |     val communityList = new mutable.HashMap[String, Double]
 83 | 
 84 |     communityWordList.foreach {
 85 |       line => {
 86 | 
 87 |         val item = new mutable.ArrayBuffer[String]
 88 |         val communityId  = line._1
 89 |         val communityWords  = line._2
 90 | 
 91 |         fileList.foreach {
 92 |           file => {
 93 | 
 94 |             val fileId = file._1
 95 |             val fileWordsList = file._2.distinct
 96 | 
 97 |             communityWords.foreach { word => {
 98 | 
 99 |               if (fileWordsList.contains(word)) item.append(fileId)
100 |             }
101 | 
102 |               communityList.put(communityId, item.distinct.length)
103 |             }
104 |           }
105 |         }
106 |       }
107 |     }
108 | 
109 |     communityList.toArray
110 |   }
111 | 
112 | }
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/hotdegreecalculate/fileIO.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.hotdegreecalculate
 2 | 
 3 | import java.io.{File, PrintWriter}
 4 | 
 5 | import _root_.util.TimeUtil
 6 | 
 7 | import scala.collection.mutable
 8 | import scala.io.Source
 9 | 
10 | /**
11 |   * Created by li on 16/7/11.
12 |   * 计算社区热度的过程中,分别将计算的结果保存到本地的文件系统中, 以及从本地文件中读取前一小时的数据
13 |   */
14 | object fileIO {
15 | 
16 |   /** 将结果保存到本地,将每小时数据保存为一个txt文件,一天的数据保存在一个文件夹里.
17 |     *
18 |     * @param dir 文件保存的目录
19 |     * @param result
20 |     */
21 |   def saveAsTextFile(dir: String, result: Array[(String, Double)]): Unit ={
22 | 
23 |     val day = TimeUtil.getDay
24 |     val hour = TimeUtil.getCurrentHour
25 | 
26 |     val writer = new PrintWriter(new File(dir +"%s".format(day) + "-" + "%s".format(hour) + ".txt"))
27 | 
28 |     for (line <- result) {
29 | 
30 |       writer.write(line._1 + "\t" + line._2 + "\n")
31 | 
32 |       }
33 | 
34 |     writer.close()
35 |   }
36 | 
37 | 
38 |   /**
39 |     * 读取当前时间前一个小时的数据,读取本地文件中的结果.
40 |     *
41 |     * @param dir 数据保存的目录
42 |     * @return
43 |     */
44 |   def readFromFile(dir: String): Array[(String, Double)] ={
45 | 
46 |     val date = TimeUtil.getPreHourStr
47 |     val res = new mutable.ArrayBuffer[(String, Double)]
48 | 
49 |     if (Source.fromFile(dir + "%s".format(date) + ".txt" ) != null) {
50 |       val temp = Source.fromFile(dir + "%s".format(date) + ".txt" )
51 | 
52 |       temp.getLines().foreach{
53 |         line =>{
54 |           val temp = line.split("\t")
55 |           res.+=((temp(0), temp(1).toDouble))
56 |         }
57 |       }
58 |     } else {
59 | 
60 |       res.+=(("init", 0.0))
61 |     }
62 | 
63 |     res.toArray
64 |   }
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/textrank/AbstractExtract.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.textrank
 2 | 
 3 | import org.graphstream.graph.implementations.SingleGraph
 4 | 
 5 | import scala.collection.mutable.ListBuffer
 6 | 
 7 | /**
 8 |   * Created by li on 16/6/23.
 9 |   */
10 | class AbstractExtract (val graphName: String, val segWord: ListBuffer[ListBuffer[(String)]] ){
11 | 
12 |   var graph = new SingleGraph(graphName)
13 | 
14 |   // 获取文本网络的句子节点
15 |   segWord.foreach {
16 |     sentenceList => {
17 |       val sentence = sentenceList.toString
18 |       if (graph.getNode(sentence) == null) graph.addNode(sentence)
19 |     }
20 |   }
21 | 
22 |   // 边的获取,通过计算句子的相似度
23 | 
24 | 
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/textrank/ConstructTextGraph.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.textrank
 2 | 
 3 | import org.graphstream.graph.implementations.SingleGraph
 4 | 
 5 | import scala.collection.mutable
 6 | import scala.collection.mutable.ListBuffer
 7 | 
 8 | /**
 9 |   * Created by li on 16/6/23.
10 |   */
11 | 
12 | /**
13 |   * 构建候选关键词图
14 |   * @param graphName 图标识s
15 |   * @param winSize 窗口大小
16 |   * @param segWord 分词的结果
17 |   * @return 候选关键词图
18 |   * @author LiYu
19 |   */
20 | class ConstructTextGraph(val graphName: String, val winSize: Int, val segWord: List[String]) {
21 | 
22 |   /**
23 |     * 构建候选关键词图
24 |     * @return 候选关键词图
25 |     */
26 |   def constructGraph: SingleGraph = {
27 | 
28 |     val graph = new SingleGraph(graphName)
29 | 
30 |     // 获取文本网络的节点
31 |     segWord.foreach(
32 |       word => if (graph.getNode(word) == null) graph.addNode(word)
33 |     )
34 | 
35 |     // 导入分完词的数据,并通过设置的窗口截取
36 |     var wordSeg = new ListBuffer[(ListBuffer[(String)])]
37 | 
38 |     val num = segWord.size - winSize
39 | 
40 |     for (i <- 0 to num) {
41 | 
42 |       val item = new ListBuffer[(String)]
43 | 
44 |       for (j <- 0 until winSize) {
45 | 
46 |         item += segWord(i + j)
47 |       }
48 | 
49 |       wordSeg += item
50 | 
51 |     }
52 | 
53 |     // 获取每个顶点以及所包含的窗口内的邻居节点
54 |     val wordSet = segWord.toSet
55 | 
56 |     val edgeSet = wordSet.map {
57 |       word => {
58 |         val edgeList = new mutable.HashSet[(String)]
59 |         wordSeg.foreach {
60 |           list => {
61 |             if (list.contains(word)){
62 |               list.foreach(x => edgeList.+=(x))
63 |             }
64 |           }
65 |         }
66 | 
67 |         (word, edgeList -= word)
68 | 
69 |       }
70 |     }
71 | 
72 |     // 构建关键词图的边
73 |     edgeSet.toArray.foreach {
74 |       edge => {
75 |         edge._2.toList.foreach {
76 |           edges =>
77 | 
78 |             if (graph.getEdge(s"${edge._1}-${edges}") == null &&
79 |               graph.getEdge(s"${edges}-${edge._1}") == null) {
80 |               graph.addEdge(s"${edge._1}-${edges}", edge._1, edges)
81 |               None
82 |             }
83 |         }
84 |       }
85 |     }
86 | 
87 |     graph
88 | 
89 |   }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/textrank/PropertyExtractor.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.textrank
 2 | 
 3 | import org.graphstream.graph.implementations.SingleGraph
 4 | import org.graphstream.graph.{Edge, Node}
 5 | 
 6 | import scala.collection.mutable
 7 | 
 8 | /**
 9 |   * Created by li on 16/6/24.
10 |   */
11 | 
12 | /**
13 |   * 关键词提取, 输出个文章提取的关键词, 无向图名称为文章的url
14 |   *
15 |   * @param graph 节点图
16 |   * @param keywordNum 关键词个数
17 |   * @return 文本的关键词
18 |   * @author LiYu
19 |   */
20 | class PropertyExtractor(val graph: SingleGraph, val keywordNum: Int) {
21 | 
22 |   /**
23 |     *
24 |     * @param iterator textRank迭代次数
25 |     * @param df 阻尼系数(Damping Factor)
26 |     * @return 关键词和得分
27 |     */
28 |   // 使用textRank提取关键词
29 |   def extractKeywords(iterator: Int, df: Double) = {
30 | 
31 |     val nodes = graph.getNodeSet.toArray.map(_.asInstanceOf[Node])
32 |     val scoreMap = new mutable.HashMap[String, Double]
33 | 
34 |     // 节点权重初始化
35 |     nodes.foreach(node => scoreMap.put(node.getId, 1.0f))
36 | 
37 |     // 迭代 迭代传播各节点的权重，直至收敛。
38 |     (1 to iterator).foreach {
39 |       i =>
40 |         nodes.foreach {
41 |           node =>
42 |             val edges = node.getEdgeSet.toArray.map(_.asInstanceOf[Edge])
43 |             var score = 1.0f - df
44 |             edges.foreach {
45 |               edge =>
46 |                 val node0 = edge.getNode0.asInstanceOf[Node]
47 |                 val node1 = edge.getNode1.asInstanceOf[Node]
48 |                 val tempNode = if (node0.getId.equals(node.getId)) node1 else node0
49 |                 score += df * (1.0f * scoreMap(tempNode.getId) / tempNode.getDegree)
50 |             }
51 |             scoreMap.put(node.getId, score)
52 |         }
53 |     }
54 | 
55 |     // 对节点权重进行倒序排序，从而得到最重要的num个单词，作为候选关键词。
56 |     scoreMap.toList.sortWith(_._2 > _._2).slice(0, keywordNum)
57 | 
58 |   }
59 | }


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/textrank/TextRank.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.textrank
 2 | 
 3 | /**
 4 |   * Created by li on 16/6/24.
 5 |   */
 6 | object TextRank {
 7 |   /**
 8 |     *
 9 |     * @param graphName 图标识
10 |     * @param window 词窗口大小
11 |     * @param doc 待抽取文本
12 |     * @param keywordNum 提取关键词个数
13 |     * @param iterator textRank迭代次数
14 |     * @param df 阻尼系数
15 |     * @return 关键词, 得分
16 |     */
17 |   def run(graphName: String, window: Int, doc: List[String],
18 |           keywordNum: Int, iterator: Int, df: Double): List[(String, Double)] = {
19 | 
20 |     // 生成关键词图
21 |     val constructTextGraph = new ConstructTextGraph(graphName, window, doc)
22 |     val textGraph = constructTextGraph.constructGraph
23 | 
24 |     // 输出提取的关键词
25 |     val keywordExtractor = new PropertyExtractor(textGraph, keywordNum)
26 |     val result = keywordExtractor.extractKeywords(iterator, df)
27 | 
28 |     result
29 |   }
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/topicmodel/LDAModel.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.topicmodel
 2 | 
 3 | import org.apache.spark.rdd.RDD
 4 | import org.apache.spark.{SparkConf, SparkContext}
 5 | //import org.apache.spark.mllib.clustering.LDA
 6 | //import org.apache.spark.rdd.RDD
 7 | 
 8 | /**
 9 |   * Created by li on 2016/4/28.
10 |   */
11 | 
12 | object LDAModel extends App{
13 | 
14 |   val conf = new SparkConf().setAppName("TopicModel").setMaster("local")
15 |   val sc = new SparkContext(conf)
16 | 
17 |   // Load documents from text files, 1 document per file
18 |   val corpus: RDD[String] = sc.wholeTextFiles("/Users/li/kunyan/docs/*.md").map(_._2)
19 | 
20 |   // Split each document into a sequence of terms (words)
21 |   val tokenized: RDD[Array[String]] =
22 |     corpus.map(_.toLowerCase.split("\\s")).map(_.filter(_.length > 3).filter(_.forall(java.lang.Character.isLetter)))
23 | 
24 |   tokenized.collect().foreach(println)
25 | 
26 |   // Choose the vocabulary.
27 |   //   termCounts: Sorted list of (term, termCount) pairs
28 |   val termCounts: Array[(String, Long)] =
29 |     tokenized.flatMap(_.map(_ -> 1L)).reduceByKey(_ + _).collect().sortBy(-_._2)
30 | 
31 |   termCounts.foreach(println)
32 | 
33 |   // vocabArray: Chosen vocab (removing common terms)
34 |   val numStopwords = 20
35 |   val vocabArray: Array[String] =
36 |     termCounts.takeRight(termCounts.length - numStopwords).map(_._1)
37 | 
38 |   // vocab: Map term -> term index
39 |   val vocab: Map[String, Int] = vocabArray.zipWithIndex.toMap
40 | //  vocab.foreach(println)
41 | 
42 | //  // Convert documents into term count vectors
43 | //  val documents: RDD[(Long, Vector)] =
44 | //    tokenized.zipWithIndex.map {
45 | //      case (tokens, id) =>
46 | //      val counts = new mutable.HashMap[Int, Double]()
47 | //      tokens.foreach { term =>
48 | //        if (vocab.contains(term)) {
49 | //          val idx = vocab(term)
50 | //          counts(idx) = counts.getOrElse(idx, 0.0) + 1.0
51 | //        }
52 | //      }
53 | //      (id, Vectors.sparse(vocab.size, counts.toSeq))
54 | //    }
55 | //
56 | //  documents.foreach(println)
57 | //
58 | //  // Set LDA parameters
59 | //  val numTopics = 3
60 | //  val lda = new LDA().setK(numTopics).setMaxIterations(8)
61 | //
62 | //  val ldaModel = lda.run(documents)
63 | ////  val avgLogLikelihood = ldaModel.logLikelihood / documents.count()
64 | //
65 | //  // Print topics, showing top-weighted 10 terms for each topic.
66 | //  val topicIndices = ldaModel.describeTopics(maxTermsPerTopic = 10)
67 | //  topicIndices.foreach { case (terms, termWeights) =>
68 | //    println("TOPIC:")
69 | //    terms.zip(termWeights).foreach { case (term, weight) =>
70 | //      println(s"${vocabArray(term.toInt)}\t$weight")
71 | //    }
72 | //    println()
73 | //  }
74 |   //
75 | }
76 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/topicmodel/LatentDirichletAllocationExample.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.topicmodel
 2 | 
 3 | import org.apache.spark.mllib.clustering.LDA
 4 | import org.apache.spark.{SparkConf, SparkContext}
 5 | import org.apache.spark.mllib.linalg.Vectors
 6 | 
 7 | object LatentDirichletAllocationExample {
 8 | 
 9 |   def main(args: Array[String]) {
10 | 
11 |     val conf = new SparkConf().setAppName("LatentDirichletAllocationExample").setMaster("local")
12 |     val sc = new SparkContext(conf)
13 | 
14 |     // $example on$
15 |     // Load and parse the data
16 |     val data = sc.textFile("/Users/li/kunyan/spark/data/mllib/sample_lda_data.txt")
17 |     data.foreach(println)
18 | 
19 |     val parsedData = data.map(s => Vectors.dense(s.trim.split(' ').map(_.toDouble)))
20 |     parsedData.foreach(println)
21 | 
22 |     // Index documents with unique IDs
23 |     val corpus = parsedData.zipWithIndex.map(_.swap).cache()
24 | 
25 |     // Cluster the documents into three topics using LDA
26 |     val ldaModel = new LDA().setK(3).run(corpus)
27 |     //
28 |     //    // Output topics. Each is a distribution over words (matching word count vectors)
29 |     //    println("Learned topics (as distributions over vocab of " + ldaModel.vocabSize + " words):")
30 |     //    val topics = ldaModel.topicsMatrix
31 |     //    for (topic <- Range(0, 3)) {
32 |     //      print("Topic " + topic + ":")
33 |     //      for (word <- Range(0, ldaModel.vocabSize)) { print(" " + topics(word, topic)); }
34 |     //      println()
35 |     //    }
36 |     //
37 |     //    // Save and load model.
38 |     //    ldaModel.save(sc, "/Users/li/kunyan/spark/LatentDirichletAllocationExample/LDAModel")
39 |     //    val sameModel = DistributedLDAModel.load(sc,
40 |     //      "/Users/li/kunyan/spark/LatentDirichletAllocationExample/LDAModel")
41 |     //    // $example off$
42 |     //
43 |     //    sc.stop()
44 |   }
45 | }


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/ClassifyModel.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.word2vec
 2 | 
 3 | import java.io.File
 4 | 
 5 | import util.{DirectoryUtil, JSONUtil}
 6 | import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
 7 | import org.apache.spark.mllib.feature.Word2VecModel
 8 | import org.apache.spark.mllib.regression.LabeledPoint
 9 | import org.apache.spark.rdd.RDD
10 | import org.apache.spark.{SparkConf, SparkContext}
11 | 
12 | /**
13 |   * Created by li on 2016/10/13.
14 |   *
15 |   */
16 | object ClassifyModel {
17 | 
18 | 
19 |   def classify(trainDataRdd: RDD[LabeledPoint]): SVMModel = {
20 | 
21 |     /** NativeBayes训练模型 */
22 |     //  val model = NaiveBayes.train(trainDataRdd, lambda = 1.0, modelType = "multinomial")
23 | 
24 |     /** SVM训练模型 */
25 |     val numIterations = 1000
26 |     val model = SVMWithSGD.train(trainDataRdd , numIterations)
27 | 
28 |     /** RandomForest训练模型 */
29 |     //    val numClasses = 2
30 |     //    val categoricalFeaturesInfo = Map[Int, Int]()
31 |     //    val numTrees = 3
32 |     //    val featureSubsetStrategy = "auto"
33 |     //    val impurity = "gini"
34 |     //    val maxDepth = 4
35 |     //    val maxBins = 32
36 |     //    val model = RandomForest.trainClassifier(trainDataRdd, numClasses, categoricalFeaturesInfo,numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins)
37 | 
38 |     /** GradientBoostedTrees训练模型 */
39 |     //    // Train a GradientBoostedTrees model.
40 |     //    // The defaultParams for Classification use LogLoss by default.
41 |     //    val boostingStrategy = BoostingStrategy.defaultParams("Classification")
42 |     //    boostingStrategy.numIterations = 3 // Note: Use more iterations in practice.
43 |     //    boostingStrategy.treeStrategy.numClasses = 2
44 |     //    boostingStrategy.treeStrategy.maxDepth = 5
45 |     //    // Empty categoricalFeaturesInfo indicates all features are continuous.
46 |     //    boostingStrategy.treeStrategy.categoricalFeaturesInfo = Map[Int, Int]()
47 |     //
48 |     //    val model = GradientBoostedTrees.train(trainDataRdd, boostingStrategy)
49 | 
50 |     model
51 | 
52 |     }
53 | 
54 |   def main(args: Array[String]) {
55 | 
56 |     val conf = new SparkConf().setAppName("textVectors").setMaster("local")
57 |     val sc = new SparkContext(conf)
58 | 
59 |     val jsonPath = "/Users/li/workshop/NaturalLanguageProcessing/src/main/scala/meachinelearning/word2vec/twc/W2VJsonConf.json"
60 | 
61 |     JSONUtil.initConfig(jsonPath)
62 | 
63 |     val word2vecModelPath = JSONUtil.getValue("w2v", "w2vmodelPath")
64 |     val modelSize = JSONUtil.getValue("w2v", "w2vmodelSize").toInt
65 |     val isModel = JSONUtil.getValue("w2v", "isModel").toBoolean
66 | 
67 |     //  val word2vecModelPath = "hdfs://master:9000/home/word2vec/classifyModel-10-100-20/2016-08-16-word2VectorModel"
68 |     val w2vModel = Word2VecModel.load(sc, word2vecModelPath)
69 | 
70 |     // 构建训练集的labeledpoint格式
71 |     // val trainSetPath = "/Users/li/workshop/DataSet/trainingsetUnbalance/BXX.txt"
72 |     // val trainSetPath = "/Users/li/workshop/DataSet/trainingSets/计算机"
73 |     val trainSetPath = "/Users/li/workshop/DataSet/trainingSets/机械"
74 | 
75 |     val trainSet = DataPrepare.readData(trainSetPath)
76 |     val trainSetRdd = sc.parallelize(trainSet).cache()
77 |     //val trainSetRdd = sc.textFile(trainSetPath)
78 | 
79 |     // val trainSetVec = trainSetRdd.map( row => {
80 |     // val x = row.split("\t")
81 |     //  (x(0), x(1).split(","))})  // 在文章进行分词的情况下，用逗号隔开
82 |     //  //(x(0), AnsjAnalyzer.cutNoTag(x(1)})   // 如果没有分词，就调用ansj进行分词
83 |     //  .map(row => (row._1.toDouble, DataPrepare.docVec(w2vModel, row._2)))
84 | 
85 |     val trainDataRdd = TextVectors.textVectorsWithWeight(trainSetRdd, w2vModel, modelSize, isModel).cache()
86 | 
87 |     val classifyModel = classify(trainDataRdd)
88 | 
89 |     val classifyModelPath = JSONUtil.getValue("classify", "classifymodelpath")
90 |     DirectoryUtil.deleteDir(new File(classifyModelPath))
91 |     classifyModel.save(sc, classifyModelPath)
92 |     println("分类模型保存完毕。")
93 | 
94 |   }
95 | }
96 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/ClassifyPredict.scala:
--------------------------------------------------------------------------------
  1 | package meachinelearning.word2vec
  2 | 
  3 | import org.apache.spark.mllib.classification.SVMModel
  4 | import org.apache.spark.mllib.evaluation.MulticlassMetrics
  5 | import org.apache.spark.mllib.feature.Word2VecModel
  6 | import org.apache.spark.mllib.regression.LabeledPoint
  7 | import org.apache.spark.rdd.RDD
  8 | import org.apache.spark.{SparkConf, SparkContext}
  9 | import util.JSONUtil
 10 | import wordSegmentation.AnsjAnalyzer
 11 | 
 12 | /**
 13 |   * Created by li on 2016/10/17.
 14 |   */
 15 | object ClassifyPredict {
 16 | 
 17 | 
 18 |   /**
 19 |     * 准确度统计分析
 20 |     *
 21 |     * @param predictionAndLabel
 22 |     */
 23 |   def acc(predictionAndLabel: RDD[(Double, Double)],
 24 |           predictDataRdd: RDD[LabeledPoint]): Unit = {
 25 | 
 26 |     //统计分类准确率
 27 |     val testAccuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / predictDataRdd.count()
 28 |     println("testAccuracy：" + testAccuracy)
 29 | 
 30 |     val metrics = new MulticlassMetrics(predictionAndLabel)
 31 |     println("Confusion matrix:" + metrics.confusionMatrix)
 32 | 
 33 |     // Precision by label
 34 |     val label = metrics.labels
 35 |     label.foreach { l =>
 36 |       println(s"Precision($l) = " + metrics.precision(l))
 37 |     }
 38 | 
 39 |     // Recall by label
 40 |     label.foreach { l =>
 41 |       println(s"Recall($l) = " + metrics.recall(l))
 42 |     }
 43 | 
 44 |     // False positive rate by label
 45 |     label.foreach { l =>
 46 |       println(s"FPR($l) = " + metrics.falsePositiveRate(l))
 47 |     }
 48 | 
 49 |     // F-measure by label
 50 |     label.foreach { l =>
 51 |       println(s"F1-Score($l) = " + metrics.fMeasure(l))
 52 |     }
 53 | 
 54 |     // val roc = metrics.roc
 55 | 
 56 |     // // AUROC
 57 |     // val auROC = metrics.areaUnderROC
 58 |     // println("Area under ROC = " + auROC)
 59 | 
 60 |   }
 61 | 
 62 | 
 63 |   def main(args: Array[String]) {
 64 | 
 65 |     val conf = new SparkConf().setAppName("textVectors").setMaster("local")
 66 |     val sc = new SparkContext(conf)
 67 | 
 68 |     val jsonPath = "/Users/li/workshop/NaturalLanguageProcessing/src/main/scala/meachinelearning/word2vec/twc/W2VJsonConf.json"
 69 | 
 70 |     JSONUtil.initConfig(jsonPath)
 71 | 
 72 |     val word2vecModelPath = JSONUtil.getValue("w2v", "w2vmodelPath")
 73 |     val modelSize = JSONUtil.getValue("w2v", "w2vmodelSize").toInt
 74 |     val isModel = JSONUtil.getValue("w2v", "isModel").toBoolean
 75 |     // load word2vec model
 76 |     val w2vModel = Word2VecModel.load(sc, word2vecModelPath)
 77 | 
 78 |     // load classify model
 79 |     val classifyModelPath = JSONUtil.getValue("classify", "classifymodelpath")
 80 |     val classifyModel = SVMModel.load(sc, classifyModelPath)
 81 | 
 82 |     // 构建测试集labeledpoint格式
 83 |     val predictSetPath = "/Users/li/workshop/DataSet/trainingSets/test"
 84 |     val predictSet = DataPrepare.readData(predictSetPath)
 85 |     val predictSetRdd = sc.parallelize(predictSet)
 86 | 
 87 |     // 对于单篇没有分词的文章
 88 |     val predictSetVec = predictSetRdd.map(row => {
 89 |       1.0 + "\t" + AnsjAnalyzer.cutNoTag(row)
 90 |     })
 91 |     val predictDataRdd = TextVectors.textVectorsWithWeight(predictSetVec, w2vModel, modelSize, isModel).cache()
 92 | 
 93 |     // val predictDataRdd = TextVectors.textVectorsWithWeight(predictSetRdd, w2vModel, modelSize, isModel).cache()
 94 | 
 95 |     /** 对测试数据集使用训练模型进行分类预测 */
 96 |     // classifyModel.clearThreshold()
 97 |     // Compute raw scores on the test set.
 98 |     val predictionAndLabel = predictDataRdd.map{ point => {
 99 |       val predictionFeature = classifyModel.predict(point.features)
100 |       (predictionFeature, point.label)
101 |     }}
102 | 
103 |     // 准确度统计分析
104 |     acc(predictionAndLabel, predictDataRdd)
105 |     //predictionAndLabel.foreach(println)
106 |     sc.stop()
107 |   }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/DataPrepare.scala:
--------------------------------------------------------------------------------
  1 | package meachinelearning.word2vec
  2 | 
  3 | import dataprocess.vipstockstatistic.util.AnsjAnalyzer
  4 | import org.apache.spark.mllib.feature.Word2VecModel
  5 | import org.apache.spark.mllib.linalg.Vectors
  6 | import org.apache.spark.mllib.regression.LabeledPoint
  7 | import org.apache.spark.rdd.RDD
  8 | import org.apache.spark.{SparkConf, SparkContext}
  9 | 
 10 | import scala.io.Source
 11 | 
 12 | /**
 13 |   * Created by li on 2016/10/14.
 14 |   */
 15 | object DataPrepare {
 16 | 
 17 |   /**
 18 |     * 读文件
 19 |     *
 20 |     * @param filePath 文本保存的位置
 21 |     * @return
 22 |     */
 23 |   def readData(filePath: String): Array[String] = {
 24 | 
 25 |     val doc = Source.fromFile(filePath).getLines().toArray
 26 | 
 27 |     doc
 28 |   }
 29 | 
 30 | 
 31 |   /**
 32 |     * 分词
 33 |     *
 34 |     * @param doc
 35 |     * @return
 36 |     */
 37 |   def docCut(doc: Array[String]): Array[String] = {
 38 | 
 39 |     val docSeg = doc.map(x => AnsjAnalyzer.cutNoTag(x)).flatMap(x =>x)
 40 | 
 41 |     docSeg
 42 |   }
 43 | 
 44 | 
 45 |   /**
 46 |     * 构建文本向量
 47 |     *
 48 |     * @param word2vecModel
 49 |     * @param docSeg
 50 |     * @return
 51 |     */
 52 |   def docVec(word2vecModel: Word2VecModel, docSeg: Array[String], modelSize: Int): Array[Double] = {
 53 | 
 54 |     val docVectors = TextVectors.textVectorsWithModel(docSeg, word2vecModel, modelSize).toArray
 55 | 
 56 |     docVectors
 57 |   }
 58 | 
 59 |   /**
 60 |     * 打标签，文本集合构建labeledPoint,集合中文章属于同一类
 61 |     *
 62 |     * @param label
 63 |     * @param docVec
 64 |     * @return
 65 |     */
 66 |   def tagAttacheBatchSingle(label: Double, docVec: RDD[Array[Double]]): RDD[LabeledPoint] = {
 67 | 
 68 |     docVec.map{
 69 |       row =>
 70 |         LabeledPoint(label , Vectors.dense(row))
 71 |     }
 72 |   }
 73 | 
 74 |   /**
 75 |     * 打标签，文本集合构建labeledPoint
 76 |     *
 77 |     * @param docVec
 78 |     * @return
 79 |     */
 80 |   def tagAttacheBatchWhole(docVec: RDD[(Double, Array[Double])]): RDD[LabeledPoint] = {
 81 | 
 82 |     docVec.map{
 83 |       row =>
 84 |         LabeledPoint(row._1 , Vectors.dense(row._2))
 85 |     }
 86 |   }
 87 | 
 88 | 
 89 |   /**
 90 |     * 打标签，单篇文本构建labeledPoint
 91 |     *
 92 |     * @param label
 93 |     * @param docVec
 94 |     * @return
 95 |     */
 96 |   def tagAttacheSingle(label: Double, docVec: Array[Double]): LabeledPoint = {
 97 | 
 98 |     LabeledPoint(label=1.0 , Vectors.dense(docVec))
 99 |   }
100 | 
101 | 
102 |   /**
103 |     * 测试代码
104 |     */
105 |   def dataPrepareTest(): Unit ={
106 |     val conf = new SparkConf().setMaster("local").setAppName("DataPrepare")
107 |     val sc = new SparkContext(conf)
108 | 
109 |     val filePath = "/Users/li/workshop/DataSet/111.txt"
110 |     //    val filePath = "/Users/li/workshop/DataSet/SogouC.reduced/Reduced/C000008/10.txt"
111 | 
112 |     val word2vecModelPath = "/Users/li/workshop/DataSet/word2vec/result/2016-07-18-15-word2VectorModel"
113 |     val model = Word2VecModel.load(sc, word2vecModelPath)
114 | 
115 |     val data = readData(filePath)
116 | 
117 |     val splitData = docCut(data)
118 | 
119 |     val doVec = docVec(model, splitData, 100)
120 | 
121 |     val labeledP = tagAttacheSingle(1.0, doVec)
122 |     println(labeledP)
123 | 
124 | 
125 |   }
126 | 
127 | 
128 |   def main(args: Array[String]) {
129 | 
130 |     dataPrepareTest()
131 | 
132 |   }
133 | 
134 | }
135 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/DeleteDirectory.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.word2vec
 2 | 
 3 | import java.io.File
 4 | 
 5 | /**
 6 |   * Created by li on 16/7/15.
 7 |   */
 8 | 
 9 | object DeleteDirectory {
10 | 
11 |   /**
12 |     * 删除空目录
13 |     * @param dir 将要删除的目录路径
14 |     */
15 |   private def doDeleteEmptyDir(dir: String): Unit = {
16 | 
17 |     val success: Boolean = new File(dir).delete()
18 | 
19 |     if (success) {
20 | 
21 |       System.out.println("Successfully deleted empty directory: " + dir)
22 | 
23 |     } else {
24 | 
25 |       System.out.println("Failed to delete empty directory: " + dir)
26 |     }
27 |   }
28 | 
29 |   /**
30 |     * 递归删除目录下的所有文件及子目录下所有文件
31 |     * @param dir 将要删除的文件目录
32 |     * @return boolean Returns "true" if all deletions were successful.
33 |     *                 If a deletion fails, the method stops attempting to
34 |     *                 delete and returns "false".
35 |     */
36 |   private def deleteDir(dir: File): Boolean = {
37 | 
38 |     if (dir.isDirectory) {
39 | 
40 |       val children = dir.list()
41 | 
42 |       //递归删除目录中的子目录下
43 |       for (i <- 0 until children.length){
44 | 
45 |         val success = deleteDir(new File(dir, children(i)))
46 | 
47 |         if (! success){
48 |           return false
49 |         }
50 | 
51 |       }
52 |     }
53 |     // 目录此时为空，可以删除
54 |     dir.delete()
55 |   }
56 | 
57 | 
58 |   /**
59 |     *测试
60 |     */
61 |   def main(args: Array[String]): Unit = {
62 | 
63 |     val dir = "/Users/li/kunyan/DataSet/1111"
64 | 
65 |     doDeleteEmptyDir(dir)
66 | 
67 |     val  success = deleteDir(new File(dir))
68 | 
69 |     if (success) System.out.println("Successfully deleted populated directory: " + dir)
70 | 
71 |     else System.out.println("Failed to delete populated directory: " + dir)
72 |   }
73 | 
74 | }
75 | 
76 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/model/data/.part-r-00000-e1c254b3-21ba-4759-b7eb-b69f39950551.gz.parquet.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/src/main/scala/meachinelearning/word2vec/model/data/.part-r-00000-e1c254b3-21ba-4759-b7eb-b69f39950551.gz.parquet.crc


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/model/data/_SUCCESS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/src/main/scala/meachinelearning/word2vec/model/data/_SUCCESS


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/model/data/_common_metadata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/src/main/scala/meachinelearning/word2vec/model/data/_common_metadata


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/model/data/_metadata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/src/main/scala/meachinelearning/word2vec/model/data/_metadata


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/model/data/part-r-00000-e1c254b3-21ba-4759-b7eb-b69f39950551.gz.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/src/main/scala/meachinelearning/word2vec/model/data/part-r-00000-e1c254b3-21ba-4759-b7eb-b69f39950551.gz.parquet


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/model/metadata/.part-00000.crc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/src/main/scala/meachinelearning/word2vec/model/metadata/.part-00000.crc


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/model/metadata/_SUCCESS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/src/main/scala/meachinelearning/word2vec/model/metadata/_SUCCESS


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/model/metadata/part-00000:
--------------------------------------------------------------------------------
1 | {"class":"org.apache.spark.mllib.classification.SVMModel","version":"1.0","numFeatures":100,"numClasses":2}
2 | 


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/readme.md:
--------------------------------------------------------------------------------
 1 | # Classification based on word2vec
 2 | 
 3 | ## TextRank model
 4 | 
 5 | Exact key words from news articles, instead of calculating whole words of wordVectors.
 6 | 
 7 | ## Word2Vec model
 8 | 
 9 | Build the LabeledPoint(model type) using the word2vec, instead of calculating tfIdf, dimension reduce and so on.
10 | 
11 | 
12 | ## Classification model
13 | 
14 | Using SVM to classify.
15 | 
16 | 
17 | ## Test results
18 | testAccuracy =
19 | Precision(0.0) =
20 | Precision(1.0) =
21 | Recall(0.0) =
22 | Recall(1.0) =
23 | FPR(0.0) =
24 | FPR(1.0) =
25 | F1-Score(0.0) =
26 | F1-Score(1.0) =


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/twc/W2VJsonConf.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "kunyan": {
 3 |     "ip": "61.147.114.88",
 4 |     "port": "16003",
 5 |     "stopwords": "16003"
 6 |   },
 7 |   "w2v": {
 8 |     "w2vmodelPath": "hdfs://61.147.114.85:9000/home/word2vec/model-10-100-20/2016-08-16-word2VectorModel",
 9 |     "w2vmodelSize": "100",
10 |     "isModel":"true"
11 |   },
12 |   "classify": {
13 |     "classifymodelpath":"/Users/li/workshop/NaturalLanguageProcessing/src/main/scala/meachinelearning/word2vec/model2"
14 |   }
15 | }


--------------------------------------------------------------------------------
/src/main/scala/meachinelearning/word2vec/twc/training.scala:
--------------------------------------------------------------------------------
 1 | package meachinelearning.word2vec.twc
 2 | 
 3 | import org.apache.spark.mllib.classification.SVMWithSGD
 4 | import org.apache.spark.mllib.evaluation.MulticlassMetrics
 5 | import org.apache.spark.mllib.regression.LabeledPoint
 6 | import org.apache.spark.{SparkConf, SparkContext}
 7 | 
 8 | /**
 9 |   * Created by zhangxin on 16-11-9.
10 |   *
11 |   * 分类模型训练
12 |   */
13 | object training {
14 | 
15 | 
16 |   def training(): Unit ={
17 | 
18 |     val conf = new SparkConf().setAppName("W2V").setMaster("local")
19 |     val sc = new SparkContext(conf)
20 | //    val jsonPath = "/home/zhangxin/work/workplace_scala/Sentiment/src/main/scala/classificationW2V/W2VJsonConf.json"
21 |     val jsonPath = "/Users/li/workshop/NaturalLanguageProcessing/src/main/scala/meachinelearning/word2vec/twc/W2VJsonConf.json"
22 | 
23 |     // 非平衡集
24 | //    val docsPath = "/home/zhangxin/work/workplace_scala/Data/trainingsetUnbalance/JSJ.txt"
25 | //    val docs = sc.textFile(docsPath).collect()
26 | 
27 |     // 平衡集
28 | //    val docsPath = "/home/zhangxin/work/workplace_scala/Data/trainingSets/房地产"
29 | //    val docsPath = "/home/zhangxin/work/workplace_scala/Data/trainingSets/有色金属"
30 | //    val docsPath = "/home/zhangxin/work/workplace_scala/Data/trainingSets/保险"
31 | //    val docsPath = "/home/zhangxin/work/workplace_scala/Data/trainingSets/医药"
32 | //    val docsPath = "/home/zhangxin/work/workplace_scala/Data/trainingSets/计算机"
33 |     val docsPath = "/Users/li/workshop/DataSet/trainingSets/工程建筑"
34 | 
35 |     val docs = sc.textFile(docsPath).collect()
36 | 
37 |     // inputs
38 |     val data = processing.process_weight(docs, sc, jsonPath)
39 |     println("[完成DOC2Vec模型]>>>>>>>>>>>>>>>>>")
40 | 
41 |     val dataRdd = sc.parallelize(data)
42 |     val splits = dataRdd.randomSplit(Array(0.8, 0.2), seed = 11L)
43 |     val train = splits(0)
44 |     val test = splits(1)
45 | 
46 |     val model = SVMWithSGD.train(train, 50)
47 | //    val model = LogisticRegressionWithSGD.train(train, 5000)
48 |     println("[完成模型训练]>>>>>>>>>>>>>>>>>>>")
49 | 
50 | 
51 |     val predictAndLabels = test.map{
52 |       case LabeledPoint(label, features) =>
53 |         val prediction = model.predict(features)
54 |         (prediction, label)
55 |     }
56 | 
57 |     val metrics = new MulticlassMetrics(predictAndLabels)
58 |     println(s"[综合_Precison] ${metrics.precision}")
59 |     println(s"[Labels] ${metrics.labels.toList}")
60 |     metrics.labels.foreach(label => {
61 |       println(s"[${label}_Precison] ${metrics.precision(label)}")
62 |     })
63 | 
64 |   }
65 | 
66 |   def main(args: Array[String]): Unit = {
67 |     training()
68 |   }
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/src/main/scala/test/regularExpression.scala:
--------------------------------------------------------------------------------
 1 | package test
 2 | 
 3 | /**
 4 |   * Created by li on 16/7/22.
 5 |   */
 6 | object regularExpression {
 7 | 
 8 |   def main(args: Array[String]) {
 9 | 
10 |     val numPatten = """([0-9]+) ([a-z]+\s+)""".r
11 | 
12 | //    val numPatten = """(\s+[0-9]+\s+) ([0-9]+) ()""".r
13 | 
14 |     val res = numPatten.findAllIn("99 bottles, 89 bottles").toArray
15 | 
16 |     res.foreach(println)
17 | 
18 |   }
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/scala/util/DirectoryUtil.scala:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | import java.io.File
 4 | 
 5 | /**
 6 |   * Created by li on 16/7/18.
 7 |   * 文件夹处理工具,删除空文件夹, 删除非空文件夹及其中的文件
 8 |   */
 9 | object DirectoryUtil {
10 | 
11 |   /**
12 |     * 删除空目录
13 |     *
14 |     * @param dir 将要删除的目录路径
15 |     */
16 |   def doDeleteEmptyDir(dir: String): Unit = {
17 | 
18 |     val success: Boolean = new File(dir).delete()
19 | 
20 |     if (success) {
21 | 
22 |       System.out.println("Successfully deleted empty directory: " + dir)
23 | 
24 |     } else {
25 | 
26 |       System.out.println("Failed to delete empty directory: " + dir)
27 |     }
28 |   }
29 | 
30 |   /**
31 |     * 递归删除目录下的所有文件及子目录下所有文件
32 |     *
33 |     * @param dir 将要删除的文件目录
34 |     * @return boolean Returns "true" if all deletions were successful.
35 |     *                 If a deletion fails, the method stops attempting to
36 |     *                 delete and returns "false".
37 |     */
38 |   def deleteDir(dir: File): Boolean = {
39 | 
40 |     if (dir.isDirectory) {
41 | 
42 |       val children = dir.list()
43 | 
44 |       //递归删除目录中的子目录下
45 |       for (i <- 0 until children.length){
46 | 
47 |         val success = deleteDir(new File(dir, children(i)))
48 | 
49 |         if (! success){
50 |           return false
51 |         }
52 | 
53 |       }
54 |     }
55 |     // 目录此时为空，可以删除
56 |     dir.delete()
57 |   }
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/src/main/scala/util/FileUtil.scala:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | import java.io.{File, BufferedReader, FileReader, PrintWriter}
 4 | 
 5 | import scala.collection.mutable
 6 | import scala.collection.mutable.ListBuffer
 7 | import scala.io.Source
 8 | 
 9 | /**
10 |   * Created by li on 2016/2/22.
11 |   */
12 | object FileUtil {
13 | 
14 |   /**
15 |     * override the old one
16 |     */
17 |   def createFile(path: String, lines: Seq[String]): Unit = {
18 | 
19 |     val writer = new PrintWriter(path, "UTF-8")
20 | 
21 |     for (line <- lines) {
22 |       writer.println(line)
23 |     }
24 |     writer.close()
25 |   }
26 | 
27 |   def readFile(path: String): ListBuffer[String] = {
28 | 
29 |     var lines = new ListBuffer[String]()
30 | 
31 |     val br = new BufferedReader(new FileReader(path))
32 |     try {
33 |       var line = br.readLine()
34 | 
35 |       while (line != null) {
36 |         lines += line
37 |         line = br.readLine()
38 |       }
39 |       lines
40 |     } finally {
41 |       br.close()
42 |     }
43 |   }
44 | 
45 |   /** 将结果保存到本地,将每小时数据保存为一个txt文件,一天的数据保存在一个文件夹里.
46 |     *
47 |     * @param dir 文件保存的目录
48 |     * @param result
49 |     * @author Li Yu
50 |     */
51 |   def saveAsTextFile(dir: String, result: Array[(String, Double)]): Unit ={
52 | 
53 |     val day = TimeUtil.getDay
54 |     val hour = TimeUtil.getCurrentHour
55 | 
56 |     val writer = new PrintWriter(new File(dir +"%s".format(day) + "-" + "%s".format(hour) + ".txt"))
57 | 
58 |     for (line <- result) {
59 | 
60 |       writer.write(line._1 + "\t" + line._2 + "\n")
61 | 
62 |     }
63 | 
64 |     writer.close()
65 |   }
66 | 
67 |   /**
68 |     * 读取当前时间前一个小时的数据,读取本地文件中的结果.
69 |     *
70 |     * @param dir 数据保存的目录
71 |     * @return
72 |     */
73 |   def readFromFile(dir: String): Array[(String, Double)] ={
74 | 
75 |     val date = TimeUtil.getPreHourStr
76 | 
77 |     val temp = Source.fromFile(dir + "%s".format(date) + ".txt" )
78 | 
79 |     val res = new mutable.ArrayBuffer[(String, Double)]
80 |     temp.getLines().foreach(
81 |       line =>{
82 |         val temp = line.split("\t")
83 |         res.+=((temp(0), temp(1).toDouble))
84 |       }
85 |     )
86 |     res.toArray
87 |   }
88 | 
89 | }
90 | 


--------------------------------------------------------------------------------
/src/main/scala/util/JsonUtil.scala:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | import org.json.JSONObject
 4 | 
 5 | import scala.util.parsing.json.JSON
 6 | import scala.io.Source
 7 | 
 8 | 
 9 | /**
10 |   * Created by li on 16/8/29.
11 |   * 读取json格式的额配置文件信息.
12 |   */
13 | object JSONUtil {
14 | 
15 |   private var config = new JSONObject()
16 | 
17 |   /**
18 |     * 初始化类
19 |  *
20 |     * @param confDir 配置文件路径
21 |     */
22 |   def initConfig(confDir: String): Unit = {
23 | 
24 |     val jsObj = Source.fromFile(confDir).getLines().mkString("")
25 |     config = new JSONObject(jsObj)
26 |   }
27 | 
28 | 
29 |   private def readConfigFile(confDir: String): Map[String, Any] = {
30 | 
31 |     val jsonFile = Source.fromFile(confDir).mkString
32 | 
33 |     val json = JSON.parseFull(jsonFile)
34 | 
35 |     json match {
36 | 
37 |       case Some(map: Map[String, Any]) => map
38 | //      case None => println("Parsing failed")
39 | //      case other => println("Unknown data structure: " + other)
40 |     }
41 | 
42 |   }
43 | 
44 |   /**
45 |     * 获取配置文件中的相应的值
46 |     * @param key1 定位key
47 |     * @param key2 定位key
48 |     * @return 返回字符串
49 |     */
50 |   def getValue(key1: String, key2: String): String = {
51 | 
52 |     config.getJSONObject(key1).getString(key2)
53 |   }
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/src/main/scala/util/LoggerUtil.scala:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | import org.apache.log4j.{BasicConfigurator, Logger}
 4 | 
 5 | /**
 6 |   * 写Log操作
 7 |   */
 8 | object LoggerUtil {
 9 | 
10 |   var logger = Logger.getLogger("word2vector")
11 |   BasicConfigurator.configure()
12 | //  PropertyConfigurator.configure("/home/mlearning/tdt/conf/log4j.properties")
13 | 
14 |   def exception(e: Exception) = {
15 | 
16 |     logger.error(e.printStackTrace())
17 | 
18 |   }
19 | 
20 |   def error(msg: String): Unit = {
21 | 
22 |       logger.error(msg)
23 |   }
24 | 
25 |   def warn(msg: String): Unit = {
26 | 
27 |       logger.warn(msg)
28 |   }
29 | 
30 |   def info(msg: String): Unit = {
31 | 
32 |       logger.info(msg)
33 |   }
34 | 
35 |   def debug(msg: String): Unit = {
36 | 
37 |       logger.debug(msg)
38 |   }
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/scala/util/MySQLUtil.scala:
--------------------------------------------------------------------------------
  1 | package util
  2 | 
  3 | import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet}
  4 | 
  5 | import scala.collection.mutable.ArrayBuffer
  6 | import scala.xml.Elem
  7 | 
  8 | /**
  9 |   * Created by li on 16/7/12.
 10 |   */
 11 | object MySQLUtil {
 12 | 
 13 |   /**
 14 |     * 读取配置文件中的内容,并建立连接
 15 |     *
 16 |     * @param configFile 配置文件
 17 |     * @return
 18 |     */
 19 |   def getConnect(configFile: Elem): Connection = {
 20 | 
 21 |     //写在配置文件中
 22 |     val url = (configFile \ "mysql" \ "url" ).text
 23 |     val userName = (configFile \ "mysql" \ "username").text
 24 |     val password = (configFile \ "mysql" \ "password").text
 25 | 
 26 |     //设置驱动
 27 |     Class.forName("com.mysql.jdbc.Driver")
 28 | 
 29 |     //初始化
 30 |     val conn = DriverManager.getConnection(url, userName, password)
 31 | 
 32 |     conn
 33 |   }
 34 | 
 35 |   /**
 36 |     * 向mysql中写数据
 37 |     *
 38 |     * @param configFile 配置文件
 39 |     * @param data 需要写进数据库里面的数据
 40 |     * @param sql sql查询语句, 格式(sql = "INSERT INTO quotes (quote, author) VALUES (?, ?)")
 41 |     */
 42 |   def write2Mysql(configFile: Elem, data: Iterator[String], sql: String): Unit ={
 43 | 
 44 |     var conn: Connection = null
 45 |     var prep: PreparedStatement = null
 46 | 
 47 |     try{
 48 | 
 49 |       // 读取配置文件并建立连接
 50 |       conn = getConnect(configFile)
 51 | 
 52 |       /** 对需要写入的内容(data)的每一行进行操作 */
 53 |       data.foreach{ line => {
 54 | 
 55 |         val temp = line.split(",")
 56 | 
 57 |         /** sql插入语句: */
 58 |         prep = conn.prepareStatement(sql)
 59 |         prep.setString(1, temp(0))
 60 |         prep.setString(2, temp(1))
 61 | 
 62 |         prep.executeUpdate()
 63 |       }}
 64 |     } catch {
 65 | 
 66 |       case e: Exception => println("Mysql Exception")
 67 |     } finally {
 68 | 
 69 |       if(conn != null) {
 70 | 
 71 |         conn.close()
 72 |       }
 73 | 
 74 |       if(prep != null) {
 75 | 
 76 |         prep.close()
 77 |       }
 78 |     }
 79 |   }
 80 | 
 81 |   /**
 82 |     * 从mysql中读取数据
 83 |     *
 84 |     * @param configFile 配置文件
 85 |     * @param sql mysql查询语句
 86 |     */
 87 |   def readFromMysql(configFile: Elem, sql: String): Array[(String, String)] = {
 88 | 
 89 |     var conn: Connection = null
 90 | 
 91 |     try {
 92 | 
 93 |       // 读取配置文件并建立连接
 94 |       conn = getConnect(configFile)
 95 | 
 96 |       val statement = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE)
 97 |       // 通过sql查询语句查询的结果
 98 |       // val sql = "select symbol, sename from bt_stcode where (EXCHANGE = '001002' or EXCHANGE = '001003') and SETYPE = '101' and CUR = 'CNY' and ISVALID = 1 and LISTSTATUS <> '2'"
 99 |       val result = statement.executeQuery(sql)
100 | 
101 |       val stocks = ArrayBuffer[(String, String)]()
102 |       while(result.next()) {
103 | 
104 |         /** todo 对查询的结果进行操作 */
105 |         val stockID = result.getString("symbol") // symbol: row name
106 |         val stock = stockID + "," + result.getString("sename") // sename: row name
107 |         stocks +=((stockID, stock))
108 |       }
109 | 
110 |       stocks.toArray
111 |     } catch {
112 | 
113 |       case e: Exception => Array(("error", "error"))
114 |     } finally {
115 | 
116 |       conn.close()
117 |     }
118 |   }
119 | 
120 | }
121 | 


--------------------------------------------------------------------------------
/src/main/scala/util/RedisUtil.scala:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | import redis.clients.jedis.Jedis
 4 | 
 5 | import scala.collection.mutable
 6 | import scala.xml.Elem
 7 | 
 8 | /**
 9 |   * Created by li on 16/7/8.
10 |   */
11 | object RedisUtil {
12 | 
13 |   var jedis: Jedis = null
14 |   /**
15 |     * 初始化 redis
16 |     *
17 |     * @param configFile 配置文件对应的 xml 对象
18 |     * @note rowNum: 10
19 |     */
20 |   def initRedis(configFile: Elem): Jedis = {
21 | 
22 |     val redisIp = (configFile \ "redis" \ "ip").text
23 |     val redisPort = (configFile \ "redis" \ "port").text.toInt
24 |     val redisDB = (configFile \ "redis" \ "db").text.toInt
25 |     val redisAuth = (configFile \ "redis" \ "auth").text
26 | 
27 |     jedis = new Jedis(redisIp, redisPort)
28 |     jedis.auth(redisAuth)
29 |     jedis.select(redisDB)
30 | 
31 |     jedis
32 |   }
33 | 
34 |   /**
35 |     *
36 |     */
37 |   def readFromRedis: Unit ={
38 | 
39 |   }
40 | 
41 |   /**
42 |     * 将结果保存到redis
43 |     *
44 |     * @param  resultData 需要保存的数据, hset格式
45 |     * @author LiYu
46 |     * @note rowNum: 12
47 |     */
48 |   def write2RedisWithHset(resultData: Array[(String, String)], time: String, dataType: String): Unit = {
49 | 
50 |     val  resultDataMap = mutable.HashMap[String, String]()
51 | 
52 |     resultData.foreach{line => {
53 |       resultDataMap.put(line._1, line._2)
54 |     }}
55 | 
56 |     val pipeline = jedis.pipelined()
57 | 
58 |     resultDataMap.toSeq.foreach{ x => {
59 | 
60 |       pipeline.hset(s"vipstockstatistic_$dataType" + s"_$time", x._1, x._2)
61 |       //      pipeline.expire("hotwordsrank_test:", 60 * 60 * 12)
62 |     }}
63 | 
64 |     pipeline.sync()
65 |   }
66 | 
67 |   /**
68 |     * 将结果保存到redis
69 |     *
70 |     * @param  resultData 需要保存的数据,zet格式
71 |     * @author Li Yu
72 |     * @note rowNum: 12
73 |     */
74 |   def write2RedisWithZset(resultData: Array[(String, String)], time: String, dataType: String, jedis: Jedis): Unit = {
75 | 
76 |     resultData.foreach{x => {
77 | 
78 |       jedis.zadd(s"vipstockstatistic_$dataType" + s"_$time", x._2.toDouble, x._1)
79 |     }}
80 | 
81 |     jedis.close()
82 |   }
83 | 
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/src/main/scala/util/TextProcessing.scala:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | 
 4 | /**
 5 |   * Created by li on 16/4/11.
 6 |   */
 7 | object TextProcessing {
 8 | 
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/main/scala/util/TimeUtil.scala:
--------------------------------------------------------------------------------
  1 | package util
  2 | 
  3 | import java.math.BigInteger
  4 | import java.text.SimpleDateFormat
  5 | import java.util.{Calendar, Date}
  6 | 
  7 | import org.apache.hadoop.hbase.client.Scan
  8 | import org.apache.hadoop.hbase.protobuf.ProtobufUtil
  9 | import org.apache.hadoop.hbase.protobuf.generated.ClientProtos
 10 | import org.apache.hadoop.hbase.util.Base64
 11 | 
 12 | /**
 13 |   * 格式化时间的工具类
 14 |   */
 15 |  object TimeUtil {
 16 | 
 17 | 
 18 |   /**
 19 |     * 获取时间戳对应的时间
 20 |     * @param timeStamp 时间戳
 21 |     * @return
 22 |     */
 23 |   def getTime(timeStamp: String): String = {
 24 |     val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd_HH:mm:ss")
 25 |     val bigInt: BigInteger = new BigInteger(timeStamp)
 26 |     val date: String = sdf.format(bigInt)
 27 |     date
 28 |   }
 29 | 
 30 |   /**
 31 |     * 获取当前时间,并转换成制定的格式
 32 |     * @return
 33 |     */
 34 |   def getDay: String = {
 35 |     val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
 36 |     val date: String = sdf.format(new Date)
 37 |     date
 38 |   }
 39 | 
 40 |   /**
 41 |     * 获取当前小时
 42 |     * @return
 43 |     */
 44 |   def getCurrentHour: Int = {
 45 |     val calendar = Calendar.getInstance
 46 |     calendar.setTime(new Date)
 47 |     calendar.get(Calendar.HOUR_OF_DAY)
 48 |   }
 49 | 
 50 |   /**
 51 |     * 获取当前小时的前一个小时
 52 |     * @return
 53 |     */
 54 |   def getPreHourStr: String = {
 55 |     val date = new Date(new Date().getTime - 60 * 60 * 1000)
 56 |     val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd-HH")
 57 |     sdf.format(date)
 58 |   }
 59 | 
 60 |   /**
 61 |     * 获取今天的日期
 62 |     *
 63 |     * @return
 64 |     */
 65 |   def getNowDate(): String = {
 66 |     val now: Date = new Date()
 67 |     val  dateFormat: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
 68 |     val res = dateFormat.format(now)
 69 |     res
 70 |   }
 71 | 
 72 | 
 73 |   /**
 74 |     * 获取本周的开始时间
 75 |     */
 76 |   def Null(){
 77 | 
 78 |   }
 79 | 
 80 |   /**
 81 |     * 获取本月的开始时间
 82 |     * http://blog.csdn.net/springlustre/article/details/47273353
 83 |     */
 84 | 
 85 | 
 86 |   /**
 87 |     * 设置时间范围
 88 |     *
 89 |     * @return 时间范围
 90 |     * @author
 91 |     */
 92 |   def setTimeRange(): String = {
 93 | 
 94 |     val scan = new Scan()
 95 |     val date = new Date(new Date().getTime - 30 * 24 * 60 * 60 * 1000)
 96 |     val format = new SimpleDateFormat("yyyy-MM-dd HH")
 97 |     val time = format.format(date)
 98 |     val time1 = format.format(new Date().getTime)
 99 |     val startTime = time + "-00-00"
100 |     val stopTime = time1 + "-00-00"
101 |     val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd HH-mm-ss")
102 |     val startRow: Long = sdf.parse(startTime).getTime
103 |     val stopRow: Long = sdf.parse(stopTime).getTime
104 | 
105 |     scan.setTimeRange(startRow, stopRow)
106 |     val proto: ClientProtos.Scan = ProtobufUtil.toScan(scan)
107 | 
108 |     Base64.encodeBytes(proto.toByteArray)
109 |   }
110 | 
111 |   /**
112 |     * 设置指定的时间范围(一天)
113 |     * @param time 指定的日期
114 |     * @return 指定日期至前一天时间范围
115 |     */
116 |   def setAssignedTimeRange(time: String): String = {
117 | 
118 |     val format = new SimpleDateFormat("yyyy-MM-dd")
119 | 
120 |     val date = format.parse(time)
121 | 
122 |     val endTime = new Date(date.getTime - 24 * 60 * 60 * 1000)
123 | 
124 |     val stopTime = format.format(endTime)
125 | 
126 |     val startDate = time + "-00-00-00"
127 |     val stopDate = stopTime  + "-00-00-00"
128 | 
129 |     val sdf = new SimpleDateFormat("yyyy-MM-dd HH-mm-ss")
130 |     val startRaw = sdf.parse(startDate).getTime
131 |     val stopRaw = sdf.parse(stopDate).getTime
132 | 
133 |     val scan = new Scan()
134 |     scan.setTimeRange(startRaw, stopRaw)
135 | 
136 |     val proto = ProtobufUtil.toScan(scan)
137 | 
138 |     Base64.encodeBytes(proto.toByteArray)
139 |   }
140 | 
141 | 
142 | }
143 | 


--------------------------------------------------------------------------------
/src/main/scala/util/UrlCategoryTrim.scala:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | import java.io.{BufferedWriter, File, FileWriter}
 4 | 
 5 | import org.apache.spark.{SparkConf, SparkContext}
 6 | 
 7 | import scala.collection.mutable.ListBuffer
 8 | import scala.io.Source
 9 | 
10 | /**
11 |   * Created by li on 16/4/6.
12 |   * 所输入的数据中有的一个url会对应多个catagory,将具有相同URL的catagory单独分隔开,变成一一对应的值
13 |   */
14 | object UrlCategoryTrim {
15 | 
16 |   // 判断如果catagory中有多个的将其分开并与url对应
17 |   def splitCategory(tuple:(String,String)): ListBuffer[(String)] = {
18 | 
19 |     val listBuffer = new ListBuffer[(String)]
20 |     val cata = tuple._1.split(",")
21 | 
22 |     if(cata.length < 1){
23 | 
24 |       listBuffer.+=(tuple._2 + "\t" + tuple._1)
25 |     } else {
26 | 
27 |       for(item <- cata){
28 | 
29 |         listBuffer.+=(tuple._2+ "\t" +item)
30 |       }
31 |     }
32 |     listBuffer
33 |   }
34 | 
35 |   def main(args: Array[String]) {
36 |     val conf = new SparkConf().setAppName("urlCatagoryTrim").setMaster("local")
37 |     val sc = new SparkContext(conf)
38 | 
39 | 
40 |     val data = Source.fromFile("/Users/li/Downloads/trainingLabel(0).new").getLines().toArray.map{
41 |       line =>
42 |         val tmp = line.split("\t")
43 |         (tmp(1), tmp(0))
44 | 
45 |     }
46 | 
47 |     //  data.flatMap(splitCatagory).foreach(println)
48 |     // 保存到文件中
49 |     val dataFile = new File("/users/li/Downloads/trainglabel3.txt")
50 |     val fileWriter = new FileWriter(dataFile)
51 |     val bufferWriter = new BufferedWriter(fileWriter)
52 | 
53 |     data.flatMap(x => splitCategory(x)).foreach (
54 |       line =>
55 |         bufferWriter.write(line + "\n")
56 |     )
57 | 
58 |     bufferWriter.flush()
59 |     bufferWriter.close()
60 | 
61 |   }
62 | 
63 | 
64 | 
65 | }
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/src/main/scala/util/XMLUtil.scala:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | import scala.xml.{XML, Elem}
 4 | 
 5 | /**
 6 |   * Created by li on 16/8/29.
 7 |   */
 8 | object XMLUtil {
 9 | 
10 |   /**
11 |     * 获取xml格式的配置文件
12 |     *
13 |     * @param dir 配置文件所在的文件目录
14 |     * @return
15 |     */
16 |   def readConfigFile(dir: String): Elem = {
17 | 
18 |     val configFile = XML.loadFile(dir)
19 | 
20 |     configFile
21 |   }
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/src/main/scala/util/regularExpression.scala:
--------------------------------------------------------------------------------
 1 | package util
 2 | 
 3 | import org.apache.spark.{SparkConf, SparkContext}
 4 | 
 5 | import scala.collection.mutable.ListBuffer
 6 | import scala.util.matching.Regex
 7 | 
 8 | /**
 9 |   * Created by li on 16/6/20.
10 |   * 正则表达式,读取文本中所有双引号里面的内容.
11 |   */
12 | object regularExpression extends App{
13 |   val conf = new SparkConf().setMaster("local").setAppName("regularexpression")
14 |   val sc = new SparkContext(conf)
15 | 
16 |   val data = sc.textFile("file:/Users/li/kunyan/111.txt")
17 | 
18 |   def quotationMatch(sentence:String): Array[String] = {
19 | 
20 | //    val regex = new Regex("\"([^\"]*)\"") //匹配双引号
21 | //    val regex = new Regex("(?<=\").{1,}(?=\")") //匹配双引号
22 |     val regex = new Regex("([-+]?\\d+(\\.\\d+)?%)|[-+]?\\d+(\\.\\d+)?")//匹配正(负)整数\浮点数\含有百分号的数
23 | 
24 |     // val regex = "\"([^\"]*)\"".r
25 |     val num = regex.findAllIn(sentence)
26 |     val res = new ListBuffer[String]
27 |     while(num.hasNext){
28 |       val item = num.next()
29 |       res += item.replaceAll("\"", "")
30 |     }
31 |       res.toArray
32 |   }
33 | 
34 |   // val res = quotationMatch(data)
35 |   data.foreach {
36 | 
37 |     x =>{
38 |       val res =  quotationMatch(x)
39 |       res.foreach(println)
40 |     }
41 |   }
42 | 
43 | 
44 | 
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/src/main/scala/wordSegmentation/AnsjAnalyzer.scala:
--------------------------------------------------------------------------------
  1 | package wordSegmentation
  2 | 
  3 | import org.ansj.domain.Term
  4 | import org.ansj.library.UserDefineLibrary
  5 | import org.ansj.splitWord.analysis.{NlpAnalysis, ToAnalysis}
  6 | import org.apache.spark.SparkContext
  7 | import org.nlpcn.commons.lang.tire.domain.Value
  8 | import org.nlpcn.commons.lang.tire.library.Library
  9 | 
 10 | /**
 11 |   * Created by zhangxin on 2016/3/8
 12 |   * 基于ansj的分词工具
 13 |   */
 14 | object AnsjAnalyzer {
 15 | 
 16 |   /**
 17 |     * ansj分词器初始化, 添加用户词典
 18 |     *
 19 |     * @param sc  spark程序入口
 20 |     * @param userDic 用户词典数组
 21 |     * @return 无
 22 |     * @author zhangxin
 23 |     */
 24 |   def init(sc: SparkContext, userDic: Array[String]): Unit = {
 25 | 
 26 |     val forest = Library.makeForest("library/default.dic")
 27 |     //    val forest = new Forest()
 28 | 
 29 |     if(userDic != null ){
 30 |       userDic.foreach(addUserDic(_, sc))
 31 |     }
 32 | 
 33 |   }
 34 | 
 35 |   /**
 36 |     * 添加用户词典到分词器
 37 |     *
 38 |     * @param dicPath  词典路径
 39 |     * @param sc spark程序入口
 40 |     * @return 无
 41 |     * @author zhangxin
 42 |     */
 43 |   def addUserDic(dicPath: String, sc: SparkContext): Unit = {
 44 | 
 45 |     //读取词典
 46 |     val dic = sc.textFile(dicPath).collect()
 47 | 
 48 |     //添加到ansj中
 49 |     dic.foreach(UserDefineLibrary.insertWord(_, "userDefine", 100))
 50 | 
 51 | 
 52 |   }
 53 | 
 54 |   /**
 55 |     * 标准分词 ，无词性标注
 56 |     *
 57 |     * @param sentence  待分词语句
 58 |     * @return 分词结果
 59 |     * @author zhangxin
 60 |     */
 61 |   def cutNoTag(sentence: String): Array[String] = {
 62 | 
 63 |     // 添加词典，这边有问题，还需继续研究
 64 |     val value = new Value("济南 \tn")
 65 | 
 66 |     Library.insertWord(UserDefineLibrary.ambiguityForest, value)
 67 | 
 68 |     //切词
 69 |     val sent = ToAnalysis.parse(sentence)
 70 | 
 71 |     //提取分词结果，过滤词性
 72 |     val words = for(i <- Range(0, sent.size())) yield sent.get(i).getName
 73 | 
 74 |     words.toArray
 75 |   }
 76 | 
 77 |   /**
 78 |     * 自然语言分词，带词性标注
 79 |     *
 80 |     * @param sentence  待分词句子
 81 |     * @return  分词结果
 82 |     * @author zhangxin
 83 |     */
 84 |   def cutWithTag(sentence: String): Array[Term] = {
 85 | 
 86 |     // 切词
 87 |     val sent = NlpAnalysis.parse(sentence)
 88 | 
 89 |     // 提取分词结果
 90 |     val words = for(i <- Range(0, sent.size())) yield sent.get(i).next()
 91 | 
 92 |     words.toArray
 93 |   }
 94 | 
 95 | 
 96 |   /**
 97 |     * 标准分词 ，无词性标注
 98 |     *
 99 |     * @param sentence  待分词语句
100 |     * @return 分词结果
101 |     */
102 |   def cutTag(sentence: String, option: Int): Array[String] = {
103 | 
104 |     val value = new Value("济南\tn")
105 | 
106 |     Library.insertWord(UserDefineLibrary.ambiguityForest, value)
107 | 
108 |     //切词
109 |     val sent = ToAnalysis.parse(sentence)
110 | 
111 |     option match {
112 |       case 0 => {
113 | 
114 |         //提取分词结果，过滤词性
115 |         val words = for(i <- Range(0, sent.size())) yield sent.get(i).getName
116 | 
117 |         words.toArray
118 |       }
119 | 
120 |       case 1 => {
121 | 
122 |         // 提取分词结果
123 |         val words = for(i <- Range(0, sent.size())) yield sent.get(i).getName
124 | 
125 |         words.toArray
126 |       }
127 |     }
128 |   }
129 | 
130 | }
131 | 


--------------------------------------------------------------------------------
/src/main/scala/wordSegmentation/wordSegmentAnalyser.scala:
--------------------------------------------------------------------------------
 1 | package wordSegmentation
 2 | 
 3 | 
 4 | /**
 5 |   * Created by li on 16/8/29.
 6 |   * 调用ansj分词系统
 7 |   */
 8 | object wordSegmentAnalyser {
 9 | 
10 |   val content = "我是中国人,我经济南下车到广州。中国经济南下势头迅猛!"
11 | 
12 |      def sentenceSegment(content: String): Array[Array[String]] = {
13 | 
14 |     // 文章切分为句子
15 |     val sentenceArr = content.split(",|。|\t|\n|，|：")
16 |     // 句子分词
17 |     val segResult = sentenceArr.map(AnsjAnalyzer.cutNoTag)
18 | 
19 |     segResult.foreach(x => {
20 | 
21 |       x.foreach(x => print(x + "| "))
22 |       println()
23 |     })
24 | 
25 |     segResult
26 |   }
27 | 
28 | 
29 | //  def isElem(sentence: Array[String], candidate: Array[String]): Boolean = {
30 | //
31 | //    candidate.map{ line => {
32 | //
33 | //      if(sentence.contains(line)) {
34 | //
35 | //        return true
36 | //
37 | //      } else {
38 | //
39 | //        return false
40 | //      }
41 | //    }}
42 | //
43 | //  }
44 | //
45 | //  def identify(sentenceSeg: Array[Array[String]],
46 | //               candidateDic: (String, Array[String])): Array[(Array[String], Array[String])] = {
47 | //
48 | //    sentenceSeg.map{line => {
49 | //      if (isElem(line, candidateDic._2)){
50 | //
51 | //        (line, candidateDic._1)
52 | //      } else {
53 | //        (line, "0")
54 | //      }
55 | //    }}
56 | //
57 | //  }
58 | 
59 | 
60 |   def main(args: Array[String]) {
61 | 
62 |     //每个句子分词
63 | 
64 |     sentenceSegment(content)
65 | 
66 |     //匹配窗口设定
67 | 
68 | 
69 |     //名词提出
70 | 
71 | 
72 | 
73 |   }
74 | 
75 | }
76 | 


--------------------------------------------------------------------------------
/src/test/resources/2016-07-11-15.txt:
--------------------------------------------------------------------------------
 1 | 好男儿	2.313289243522607
 2 | 太正宵	0.7779809171400112
 3 | 婚纱	0.7515285506073754
 4 | 俞思远	0.6920085439132682
 5 | 董文华	0.6858591525761419
 6 | 直播	0.5917747547425979
 7 | 六合彩	0.5647028809538401
 8 | 李宇春	0.5534632615609104
 9 | 男同志	0.43542120073545265
10 | 演唱会	0.415335092651389
11 | 无耻	0.4137490483483452
12 | 敢死队	0.2982491500059149


--------------------------------------------------------------------------------
/src/test/resources/2016-07-12-13.txt:
--------------------------------------------------------------------------------
1 | t1	1.0564168992636667
2 | t3	0.9591311372616367
3 | t2	0.6064584948059578
4 | 敢死队	0.03318315879765851
5 | 好男儿	0.028594324469757446
6 | 


--------------------------------------------------------------------------------
/src/test/resources/2016-07-12-15.txt:
--------------------------------------------------------------------------------
 1 | 好男儿	0.313289243522607
 2 | 太正宵	1.7779809171400112
 3 | 婚纱	0.7515285506073754
 4 | 俞思远	2.6920085439132682
 5 | 董文华	0.6858591525761419
 6 | 直播	0.5917747547425979
 7 | 六合彩	0.5647028809538401
 8 | 李宇春	0.5534632615609104
 9 | 男同志	0.43542120073545265
10 | 演唱会	0.415335092651389
11 | 无耻	0.4137490483483452
12 | 敢死队	0.2982491500059149


--------------------------------------------------------------------------------
/src/test/resources/2016-07-12-16.txt:
--------------------------------------------------------------------------------
1 | t3	1.0564168992636667
2 | t1	0.9591311372616367
3 | t2	0.6064584948059578
4 | 敢死队	0.03318315879765851
5 | 好男儿	0.028594324469757446
6 | 


--------------------------------------------------------------------------------
/src/test/resources/text/1.txt:
--------------------------------------------------------------------------------
1 | 光伏,中国人民银行,列,入,绿色,债券,支援,专案,目录,2015年12月22日,19:00:00,中国人民银行,发布,2015,第39,号,公告,公告,称为,加快,建设生态文明,引导,金融机构,服务,绿色发展,推动,经济结构转型,升级,经济发展方式转变,支援,金融机构,发行,绿色,金融债券,募集资金,支援,绿色,产业发展,笔者,目录,第5,项,清洁能源,发电,中,风力发电,光伏发电,智慧,电网,能源,因特网,分布式能源,太阳能热利用,水力发电,新能源,利用,列,入,太阳能光伏发电站,太阳能,高,温热,发电站,不含,分布式,太阳能光伏发电,系统,需,限定,条件,多晶硅,电池,组件,光电,转化,效率,≥,15.5%,组件,专案,投产,运行,日,一年,衰减率,≤,2.5%,年,衰减率,≤,0.7%,单晶硅,电池,组件,光电,转化,效率,≥,16%,组件,专案,投产,运行,日,一年,衰减率,≤,3%,年,衰减率,≤,0.7%,高,倍,聚光光伏,组件,光电,转化,效率,≥,28%,项目,投产,运行,日,一年,衰减率,≤,2%,年,衰减率,≤,0.5%,项目全生命周期,衰减率,≤,10%,硅基,薄膜电池,组件,光电,转化,效率,≥,8%,铜铟镓硒,CIGS,薄膜电池,组件,光电,转化,效率,≥,11%,碲化镉,CdTe,薄膜电池,组件,光电,转化,效率,≥,11%,薄膜电池,组件,光电,转化,效率,≥,10%,多晶硅,单晶硅,薄膜电池,项目全生命周期,衰减率,≤,20%,智能电网,能源,因特网,指,提高,供,需,负荷,平衡,回应,能力,改善,电网,综合,能效,降低,输变电,损耗,增强,可再生能源,接,入,能力,电网建设,运营,技术,升级,改造,专案,1.,智能电网,指,采用,智慧,型,电气设备,即时,双向,集成,通信技术,先进技术,电网建设,运营,专案,电网,智慧,化,升级,改造,项目,2.,能源,因特网,指,综合,电力电子,资讯,智慧,管理技术,连接,分布式能源,含,分布式,可再生能源,分布式,储能,装置,类型,负荷,能量,双向,流动,交换,共享,电网,微电网,能源,燃气,网络,设施,建设,运营,专案,分布式能源,指,区域,能源站,包括,天然气,区域,能源站,分布式光伏发电,系统,分布式能源,设施,建设,运营,分布式能源,接,入,峰谷,调节,系统,分布式,电力,交易平台,能源管理系统,建设,运营,附,中国人民银行公告,2015,第39,号,绿色,债券,支援,专案,目录


--------------------------------------------------------------------------------
/src/test/resources/text/2.txt:
--------------------------------------------------------------------------------
1 | 记者,国家电网公司,获悉,9月23日,河北丰宁,二期,山东文登,重庆,蟠龙,抽水蓄能电站,工程,以下简称,丰宁,二期,文登,蟠龙,抽,蓄,座,抽,蓄,电站,正式,开工,总投资,244.4亿,元,总装机容量,480万,千瓦,计划,2022年,竣工,投产,项目,预计,增加,发电,装备制造业,产值,111亿,元,推动,相关,装备制造业,发展,开工,动员大会,国家电网公司,董事长,党组书记,刘振亚,丰宁,二期,文登,蟠龙,抽,蓄,国家电网公司,推进,特高压电网,建设,服务,清洁能源,发展,重大工程,继,2015年6月,安徽金寨,山东沂蒙,河南,天池,座,抽水蓄能电站,第二批,开工,电站,标志,我国,抽水蓄能电站,加快,发展,新,阶段,介绍,河北丰宁,二期,抽水蓄能电站,项目,位于,河北省承德市,丰宁县,装机容量,180万,千瓦,安装,台,30万,千瓦,可逆,式,水轮发电机组,500,千伏,电压,接,入,华北电网,工程投资,87.5亿,元,丰宁抽水蓄能电站,一期,二期,装机容量,360万,千瓦,世界上,装机容量,抽水蓄能电站,山东,文登抽水蓄能电站,位于,山东省,威海市文登区,装机容量,180万,千瓦,安装,台,30万,千瓦,可逆,式,水轮发电机组,500,千伏,电压,接,入,山东电网,工程投资,85.7亿,元,重庆,蟠龙,抽水蓄能电站,位于,重庆市綦江区,装机容量,120万,千瓦,安装,台,30万,千瓦,可逆,式,水轮发电机组,500,千伏,电压,接,入,重庆电网,工程投资,71.2亿,元,国网,座,受,端,电网,地区,抽水蓄能电站,建成,更好地,接纳,区,外,来电,优化,电源,结构,提高,北,西南,地区,清洁能源,消纳,能力,提高,特高压电网,系统安全,可靠性,综合,煤电,机组,消纳,清洁能源,效果,建设,丰宁,二期,文登,蟠龙,抽,蓄,年,节约,原煤,消耗,291万,吨,减排,烟尘,0.3万,吨,二氧化硫,1.4万,吨,氮氧化物,1.3万,吨,二氧化碳,485万,吨,节能减排,大气污染防治,国家电网公司,经营,区域,内在,运,抽水蓄能电站,装机容量,1674.5万,千瓦,建,规模,1880万,千瓦,预计,2017年,我国,抽水蓄能,装机,3300万,千瓦,超过,美国,世界上,抽水蓄能电站,第一,大国


--------------------------------------------------------------------------------
/src/test/resources/text/abstract:
--------------------------------------------------------------------------------
1 | 算法可大致分为基本算法、数据结构的算法、数论算法、计算几何的算法、图的算法、动态规划以及数值分析、加密算法、排序算法、检索算法、随机化算法、并行算法、厄米变形模型、随机森林算法。
2 | 算法可以宽泛的分为三类，
3 | 一，有限的确定性算法，这类算法在有限的一段时间内终止。他们可能要花很长时间来执行指定的任务，但仍将在一定的时间内终止。这类算法得出的结果常取决于输入值。
4 | 二，有限的非确定算法，这类算法在有限的时间内终止。然而，对于一个（或一些）给定的数值，算法的结果并不是唯一的或确定的。
5 | 三，无限的算法，是那些由于没有定义终止定义条件，或定义的条件无法由输入的数据满足而不终止运行的算法。通常，无限算法的产生是由于未能确定的定义终止条件。


--------------------------------------------------------------------------------
/src/test/scala/CNNTest.scala:
--------------------------------------------------------------------------------
 1 | import breeze.linalg.{DenseMatrix, DenseVector}
 2 | 
 3 | //import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, DenseVector => BDV, Matrix => BM, SparseVector => BSV, Vector => BV, accumulate => Accumulate, axpy => brzAxpy, rot90 => Rot90, sum => Bsum, svd => brzSvd, DenseVector}
 4 | //import breeze.numerics.{exp => Bexp, tanh => Btanh}
 5 | //import org.apache.spark.mllib.linalg.DenseMatrix
 6 | 
 7 | 
 8 | /**
 9 |   * Created by li on 16/8/15.
10 |   */
11 | object CNNTest {
12 | 
13 | 
14 |   def main(args: Array[String]) {
15 | //
16 | //    def sigm(matrix: BDM[Double]): BDM[Double] = {
17 | //      val s1 = 1.0 / (Bexp(matrix * (-1.0)) + 1.0)
18 | //      s1
19 | //    }
20 | //
21 | //    val result = BDM.ones[Double](2, 3) + 1.8
22 | 
23 | 
24 | 
25 |     val a = DenseVector(1.0, 2.0, 3.0, 4.0, 5.0)
26 | 
27 |     val b = DenseVector(1.0, 2.0, 3.0, 4.0, 5.0)
28 | 
29 |     val c = DenseMatrix.ones[Double](5, 2)
30 | 
31 |     val d = DenseMatrix.ones[Double](5, 5)
32 | 
33 |     println((a.toDenseMatrix :* d))
34 | 
35 | 
36 | //    val c = (a :* b) :* d
37 | //
38 | //    println(c)
39 | 
40 | 
41 | 
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/src/test/scala/HDFSUtilTest.scala:
--------------------------------------------------------------------------------
 1 | import util.HDFSUtil
 2 | 
 3 | import scala.xml.XML
 4 | 
 5 | /**
 6 |   * Created by li on 16/7/25.
 7 |   */
 8 | object HDFSUtilTest {
 9 | 
10 |   def main(args: Array[String]) {
11 | 
12 |     val configFile = XML.loadFile("/Users/li/Kunyan/NaturalLanguageProcessing/src/main/scala/util/config.xml")
13 | 
14 |     val filesystem = HDFSUtil.setHdfsConfigure(configFile)
15 | 
16 |   }
17 | 
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/src/test/scala/JSONUtilTest.scala:
--------------------------------------------------------------------------------
 1 | import util.JSONUtil
 2 | 
 3 | /**
 4 |   * Created by li on 16/8/29.
 5 |   */
 6 | object JSONUtilTest {
 7 | 
 8 | 
 9 |   def main(args: Array[String]) {
10 | 
11 |     val confDir = "/Users/li/Kunyan/NaturalLanguageProcessing/src/main/resources/jsonConfig.json"
12 | 
13 |     JSONUtil.initConfig(confDir)
14 | 
15 |     val res = JSONUtil.getValue("hbase", "rootDir")
16 | 
17 |     println(res)
18 |   }
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/src/test/scala/MySQLUtilTest.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.{SparkContext, SparkConf}
 2 | import util.{XMLUtil, MySQLUtil}
 3 | 
 4 | /**
 5 |   * Created by li on 16/8/29.
 6 |   */
 7 | object MySQLUtilTest {
 8 | 
 9 |   def main(args: Array[String]) {
10 |     val conf = new SparkConf().setAppName("MySQLUtilTest").setMaster("local")
11 |     val sc = new SparkContext(conf)
12 | 
13 |     val confDir = "/Users/li/Kunyan/workShop/VipStockStatistic/src/main/scala/util/config.xml"
14 | 
15 |     val stockSql = "select symbol, sename from bt_stcode where (EXCHANGE = '001002' or EXCHANGE = '001003') " +
16 |       "and SETYPE = '101' and CUR = 'CNY' and ISVALID = 1 and LISTSTATUS <> '2'"
17 | 
18 |     val configFile = XMLUtil.readConfigFile(confDir)
19 | 
20 |     val stockDic = MySQLUtil.readFromMysql(configFile, stockSql)
21 |       .map(row => (row._1, row._2.split(","))).toMap
22 | 
23 |     stockDic.foreach(x => print(x._1, x._2(0)))
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 |   }
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/src/test/scala/Test.scala:
--------------------------------------------------------------------------------
 1 | import org.apache.spark.ml.feature.Word2Vec
 2 | import org.apache.spark.sql.SQLContext
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | 
 5 | 
 6 | /**
 7 |   * Created by li on 16/4/15.
 8 |   *
 9 |   *
10 |   *
11 |   *     import org.apache.spark.ml.feature.Word2Vec
12 | 
13 |   */
14 | object Test {
15 | 
16 |   def main(args: Array[String]) {
17 |     //    val setPath = "/Users/li/kunyan/DataSet/trainingsetUnbalance/YSJS.txt"
18 |     //    val industry = "化工化纤"
19 |     //    BinaryClassificationRDD.dataOperation(setPath, industry)
20 |     val conf = new SparkConf().setAppName("test").setMaster("local")
21 |     val sc = new SparkContext(conf)
22 |     val sqlContext = new SQLContext(sc)
23 | 
24 | 
25 |     // Input data: Each row is a bag of words from a sentence or document.
26 |     val documentDF = sqlContext.createDataFrame(Seq(
27 |       "Hi I heard about Spark".split(" "),
28 |       "I wish Java could use case classes".split(" "),
29 |       "Logistic regression models are neat".split(" ")
30 |     ).map(Tuple1.apply)).toDF("text")
31 | 
32 |     // Learn a mapping from words to Vectors.
33 |     val word2Vec = new Word2Vec()
34 |       .setInputCol("text")
35 |       .setOutputCol("result")
36 |       .setVectorSize(3)
37 |       .setMinCount(0)
38 |     val model = word2Vec.fit(documentDF)
39 |     val result = model.transform(documentDF)
40 |     result.select("result").foreach(println)
41 |     result.show()
42 | 
43 |   }
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/src/test/scala/TextRankTest.scala:
--------------------------------------------------------------------------------
 1 | import meachinelearning.textrank.{PropertyExtractor, ConstructTextGraph}
 2 | import org.graphstream.graph.Node
 3 | 
 4 | import scala.collection.mutable.ListBuffer
 5 | import scala.io.Source
 6 | 
 7 | /**
 8 |   * Created by li on 16/6/23.
 9 |   */
10 | object TextRankTest {
11 | 
12 |   def main(args: Array[String]) {
13 | 
14 |     val doc = new ListBuffer[(String)]
15 | 
16 |     val text = Source.fromURL(getClass.getResource(s"/text/${2}.txt")).getLines().mkString("\n")
17 |     text.split(",").foreach(x => doc.+=(x))
18 | 
19 | 
20 |     // 构建候选关键词图, 设置窗口大小5
21 |     val textGraph = new ConstructTextGraph("url", 10, doc.toList).constructGraph
22 | 
23 |     // 输出构建的无向图的边和顶点
24 |     //  textGraph.getEdgeSet.toArray.foreach(println)
25 |     //  textGraph.getNodeSet.toArray.foreach(println)
26 |     //  assert(textGraph.getEdgeSet.size() > 0)
27 |     println((1 to 30).map(i => "=").mkString)
28 | 
29 |     // 输出提取的关键词
30 |     val keywordExtractor = new PropertyExtractor(textGraph, 5)
31 |     keywordExtractor.extractKeywords(100, 0.85f).foreach(
32 |       node =>
33 |         println(" 关键词: "+node._1," 得分: "+node._2)
34 |     )
35 |     println((1 to 30).map(i => "=").mkString)
36 | 
37 |     //  获取每个关键词节点的度
38 |     textGraph.getNodeSet.toArray.map(_.asInstanceOf[Node]).foreach {
39 |       node =>
40 |         println (node.getId, node.getDegree)
41 |     }
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/test/scala/classification.scala:
--------------------------------------------------------------------------------
 1 | import java.io.{BufferedWriter, File, FileWriter}
 2 | 
 3 | import org.apache.spark.{SparkContext, SparkConf}
 4 | import org.apache.spark.rdd.RDD
 5 | 
 6 | 
 7 | /**
 8 |   * Created by li on 16/3/31.
 9 |   */
10 | object classification {
11 | 
12 |   val conf = new SparkConf().setAppName("meachinelearning/classification").setMaster("local")
13 |   val sc = new SparkContext(conf)
14 | 
15 | 
16 |   def getFile(url: String): RDD[(String, String)] ={
17 |     val content  = sc.textFile(url).map{
18 |       line =>
19 |         val data  =  line.split("\t")
20 |         if (data.length > 1) data(0) -> data(1)
21 |     }.filter( _ != ()).map(_.asInstanceOf[(String, String)])
22 |     content
23 |   }
24 | 
25 | 
26 |   def getTrainingset(catagory: RDD[(String, String)], content: RDD[(String, String)], label: String, dataFile: String): Unit  ={
27 |     //    val trainingSet = new ArrayBuffer[String ]
28 |     val DataFile = new File(dataFile)
29 |     val bufferWriter = new BufferedWriter(new FileWriter(DataFile))
30 |     content.map {
31 |       line =>
32 |         catagory.map{
33 |           data =>
34 |             bufferWriter.write((if(label == line._1) "1" else "0") + "\t" + line._1 + "\t"+ line._2 + "\n")
35 |           // val trainingdata = (if(catagory == line._1) "1" else "0") + "\t" + line._1 + "\t"+ line._2
36 |           // trainingSet += trainingdata
37 |         }
38 |     }
39 |     bufferWriter.flush()
40 |     bufferWriter.close()
41 | 
42 |   }
43 | 
44 | 
45 | 
46 |   //    val DataFile = new File(dataFile)
47 |   //    val bufferWriter = new BufferedWriter(new FileWriter(DataFile))
48 |   //    for(item <- list) {
49 |   //      val cata = map.get(item._1).get
50 |   //      bufferWriter.write((if(cata == catagory) "1" else "0") + "\t" + cata + "\t"+ item._2 + "\n")
51 |   //    }
52 |   //    bufferWriter.flush()
53 |   //    bufferWriter.close()
54 |   //  }
55 | 
56 |   def main(args: Array[String]) {
57 | 
58 |     //    val urlContent = new collection.mutable.HashMap[String , String ]
59 |     //    val urlCatagory = new ListBuffer[(String, String)]
60 |     val catagory1 = "有色金属"
61 |     val datafile1 = "/users/li/Downloads/2222.txt"
62 | 
63 |     val url1 = "/users/li/Downloads/segTraining"
64 |     val url2 = "/users/li/Downloads/traininglabel"
65 | 
66 |     val urlContent = getFile(url1)
67 |     val urlCatagory = getFile(url2)
68 | 
69 |     val res = getTrainingset(urlCatagory, urlContent, catagory1, datafile1)
70 | 
71 |   }
72 | 
73 | 
74 | 
75 | 
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/src/test/scala/keywordExtractorTest.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | /**
 4 |   * Created by li on 16/6/27.
 5 |   */
 6 | object keywordExtractorTest {
 7 | 
 8 | 
 9 |   def main(args: Array[String]) {
10 | 
11 | 
12 |     val url = "http://anotherbug.blog.chinajavaworld.com/entry/4545/0/"
13 | 
14 |     println(url.contains("blog"))
15 |   }
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/src/test/scala/telecomDataProcessingTest.scala:
--------------------------------------------------------------------------------
 1 | //import org.apache.spark.{SparkConf, SparkContext}
 2 | //
 3 | //import scala.collection.mutable
 4 | //
 5 | ///**
 6 | //  * Created by li on 16/7/20.
 7 | //  */
 8 | //object TelecomDataProcessingTest {
 9 | //
10 | //
11 | //  def main(args: Array[String]) {
12 | //
13 | //    val conf = new SparkConf().setAppName("test").setMaster("local")
14 | //    val sc = new SparkContext(conf)
15 | //
16 | //    val setTime = "2016-07-15"
17 | //
18 | //    //设置时间段,一小时为一个间隔
19 | //    val timeRangeHour = TelecomDataProcessing.setAssignedHourRange(setTime)
20 | //
21 | //    // Hdfs上的数据,一天的数据
22 | //    val dir = "hdfs://222.73.57.12:9000/telecom/shdx/origin/data/"
23 | //    val dataFromHDFS = TelecomDataProcessing.dataReadFromHDFS(sc, dir, setTime).filter(! _._1.contains("home/telecom"))
24 | //
25 | //    println("dataFromHDFS结束")
26 | //    // dataFromHDFS.foreach(println)
27 | //
28 | //    // hbase上的数据
29 | //    val confDir = "/Users/li/kunyan/NaturalLanguageProcessing/src/main/scala/util/config.xml" // hbase配置文件目录
30 | //    val tableName = "wk_detail" // 表名
31 | //
32 | //    val result = new mutable.ArrayBuffer[(String, Array[(String, Long)])]
33 | //
34 | //    for (item <- 0 until 1) {
35 | //
36 | //      val temp = dataFromHDFS.filter { line => {
37 | //
38 | //        (timeRangeHour(item)._1 <= line._1.toLong) && (line._1.toLong <= timeRangeHour(item)._2)
39 | //
40 | //      }}.map(_._2)
41 | //
42 | //      println("temp读取结束")
43 | //
44 | //      temp.foreach(println)
45 | //
46 | //      val hBaseConf = TelecomDataProcessing.getHBaseConf(sc, confDir, timeRangeHour(item), tableName)
47 | //
48 | //      val newsFromHBase = TelecomDataProcessing.newsReadFromHBase(hBaseConf)
49 | //
50 | //      newsFromHBase.foreach(println)
51 | //
52 | //      val res = TelecomDataProcessing.urlMatching(temp, newsFromHBase)
53 | //
54 | //      result.+=((item.toString, res))
55 | //
56 | //    }
57 | //
58 | //    result.toArray.foreach( x => {
59 | //      println(x._1)
60 | //      x._2.foreach(x =>  println((x._1, x._2)))
61 | //    })
62 | //
63 | //
64 | //    sc.stop()
65 | //
66 | //  }
67 | //
68 | //}
69 | 


--------------------------------------------------------------------------------
/src/test/scala/testRankTest.scala:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import meachinelearning.textrank.TextRank
 4 | 
 5 | import scala.collection.mutable.ListBuffer
 6 | import scala.io.Source
 7 | 
 8 | /**
 9 |   * Created by li on 16/6/24.
10 |   */
11 | object testRankTest {
12 | 
13 |   def main(args: Array[String]) {
14 | 
15 |     val doc = new ListBuffer[(String)]
16 | 
17 |     val text = Source.fromURL(getClass.getResource(s"/text/${2}.txt")).getLines().mkString("\n")
18 |     text.split(",").foreach(x => doc.+=(x))
19 | 
20 |     val keyWordList = TextRank.run("url", 5, doc.toList, 3, 100, 0.85f)
21 | 
22 |     keyWordList.foreach {
23 |       word => {
24 |         println(word._1, word._2)
25 |       }
26 |     }
27 |   }
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/src/test/scala/timeutilTest.scala:
--------------------------------------------------------------------------------
 1 | import util.TimeUtil
 2 | 
 3 | /**
 4 |   * Created by li on 16/7/19.
 5 |   */
 6 | object TimeUtilTest {
 7 | 
 8 |   def main(args: Array[String]) {
 9 | 
10 |     TimeUtil.setAssignedTimeRange("2016-2-1")
11 | 
12 |   }
13 | 
14 | }
15 | 


--------------------------------------------------------------------------------
/src/test/scala/word2vecTest.scala:
--------------------------------------------------------------------------------
 1 | import meachinelearning.word2vec.Word2Vec
 2 | import org.apache.spark.{SparkConf, SparkContext}
 3 | 
 4 | /**
 5 |   * Created by li on 16/7/15.
 6 |   */
 7 | object word2vecTest {
 8 | 
 9 | 
10 |   def main(args: Array[String]) {
11 | 
12 | 
13 |     val conf = new SparkConf().setAppName("word2vec").setMaster("local")
14 |     val sc = new SparkContext(conf)
15 | 
16 |     val data = sc.parallelize(List("sadfad\tsdfasdfasdf\tasdfasdfasdfasdfasdf\t中欧,8,美国,成都,;,", "dddddd\tfdasdfvvv\tdfafasfdsadfs\t日本,中欧,.,中国,加州,/,顺分"))
17 | 
18 |     val punctuation = sc.textFile("/Users/li/kunyan/DataSet/punctuations.txt").collect()
19 | 
20 |     val s =  Word2Vec.formatTransform(data, punctuation)
21 | 
22 |     s.foreach(println)
23 | 
24 |   }
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/target/.history:
--------------------------------------------------------------------------------
1 | all
2 | help sbt
3 | help clean
4 | help clear
5 | exit
6 | 


--------------------------------------------------------------------------------
/target/resolution-cache/default/classification$sbt_2.10/1.0/resolved.xml.properties:
--------------------------------------------------------------------------------
 1 | #default#classification$sbt_2.10;1.0 resolved revisions
 2 | #Tue Jul 05 15:26:43 CST 2016
 3 | +revision\:\#@\#\:+5.0.4\:\#@\#\:+module\:\#@\#\:+asm-tree\:\#@\#\:+organisation\:\#@\#\:+org.ow2.asm\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=5.0.4 release 5.0.4 null
 4 | +revision\:\#@\#\:+1.9.6\:\#@\#\:+module\:\#@\#\:+ant\:\#@\#\:+organisation\:\#@\#\:+org.apache.ant\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.9.6 release 1.9.6 null
 5 | +revision\:\#@\#\:+3.0.20\:\#@\#\:+module\:\#@\#\:+plexus-utils\:\#@\#\:+organisation\:\#@\#\:+org.codehaus.plexus\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=3.0.20 release 3.0.20 null
 6 | +revision\:\#@\#\:+1.9.6\:\#@\#\:+module\:\#@\#\:+ant-launcher\:\#@\#\:+organisation\:\#@\#\:+org.apache.ant\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.9.6 release 1.9.6 null
 7 | +revision\:\#@\#\:+5.0.4\:\#@\#\:+module\:\#@\#\:+asm\:\#@\#\:+organisation\:\#@\#\:+org.ow2.asm\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=5.0.4 release 5.0.4 null
 8 | +revision\:\#@\#\:+2.2.1\:\#@\#\:+module\:\#@\#\:+scalactic_2.10\:\#@\#\:+organisation\:\#@\#\:+org.scalactic\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.2.1 release 2.2.1 null
 9 | +revision\:\#@\#\:+0.13.8\:\#@\#\:+module\:\#@\#\:+sbt\:\#@\#\:+organisation\:\#@\#\:+org.scala-sbt\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.13.8 release 0.13.8 null
10 | +revision\:\#@\#\:+1.0\:\#@\#\:+module\:\#@\#\:+jsr250-api\:\#@\#\:+organisation\:\#@\#\:+javax.annotation\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.0 release 1.0 null
11 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-reflect\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+info.apiURL\:\#@\#\:+http\://www.scala-lang.org/api/2.10.4/\:\#@\#\:=2.10.4 ? 2.10.4 null
12 | +revision\:\#@\#\:+0.3.0\:\#@\#\:+module\:\#@\#\:+org.eclipse.sisu.plexus\:\#@\#\:+organisation\:\#@\#\:+org.eclipse.sisu\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.3.0 release 0.3.0 null
13 | +revision\:\#@\#\:+1.5.5\:\#@\#\:+module\:\#@\#\:+plexus-component-annotations\:\#@\#\:+organisation\:\#@\#\:+org.codehaus.plexus\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.5.5 release 1.5.5 null
14 | +revision\:\#@\#\:+0.3.0\:\#@\#\:+module\:\#@\#\:+org.eclipse.sisu.inject\:\#@\#\:+organisation\:\#@\#\:+org.eclipse.sisu\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.3.0 release 0.3.0 null
15 | +revision\:\#@\#\:+3.3.3\:\#@\#\:+module\:\#@\#\:+maven-plugin-api\:\#@\#\:+organisation\:\#@\#\:+org.apache.maven\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=3.3.3 release 3.3.3 null
16 | +revision\:\#@\#\:+1.0\:\#@\#\:+module\:\#@\#\:+cdi-api\:\#@\#\:+organisation\:\#@\#\:+javax.enterprise\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.0 release 1.0 null
17 | +revision\:\#@\#\:+2.5.2\:\#@\#\:+module\:\#@\#\:+plexus-classworlds\:\#@\#\:+organisation\:\#@\#\:+org.codehaus.plexus\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.5.2 release 2.5.2 null
18 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-library\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+info.apiURL\:\#@\#\:+http\://www.scala-lang.org/api/2.10.4/\:\#@\#\:=2.10.4 ? 2.10.4 null
19 | +revision\:\#@\#\:+3.3.3\:\#@\#\:+module\:\#@\#\:+maven-model\:\#@\#\:+organisation\:\#@\#\:+org.apache.maven\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=3.3.3 release 3.3.3 null
20 | +revision\:\#@\#\:+5.0.4\:\#@\#\:+module\:\#@\#\:+asm-commons\:\#@\#\:+organisation\:\#@\#\:+org.ow2.asm\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=5.0.4 release 5.0.4 null
21 | +revision\:\#@\#\:+3.3.3\:\#@\#\:+module\:\#@\#\:+maven-artifact\:\#@\#\:+organisation\:\#@\#\:+org.apache.maven\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=3.3.3 release 3.3.3 null
22 | +revision\:\#@\#\:+1.6.0\:\#@\#\:+module\:\#@\#\:+jarjar\:\#@\#\:+organisation\:\#@\#\:+org.pantsbuild\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.6.0 release 1.6.0 null
23 | +revision\:\#@\#\:+1\:\#@\#\:+module\:\#@\#\:+javax.inject\:\#@\#\:+organisation\:\#@\#\:+javax.inject\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1 release 1 null
24 | +sbtVersion\:\#@\#\:+0.13\:\#@\#\:+revision\:\#@\#\:+0.14.1\:\#@\#\:+module\:\#@\#\:+sbt-assembly\:\#@\#\:+organisation\:\#@\#\:+com.eed3si9n\:\#@\#\:+scalaVersion\:\#@\#\:+2.10\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.14.1 ? 0.14.1 null
25 | 


--------------------------------------------------------------------------------
/target/resolution-cache/default/classification$sbt_2.10/1.0/resolved.xml.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <ivy-module version="2.0" xmlns:e="http://ant.apache.org/ivy/extra">
 3 | 	<info organisation="default"
 4 | 		module="classification$sbt_2.10"
 5 | 		revision="1.0"
 6 | 		status="integration"
 7 | 		publication="20160705152639"
 8 | 	/>
 9 | 	<configurations>
10 | 		<conf name="default" visibility="public"/>
11 | 	</configurations>
12 | 	<publications>
13 | 	</publications>
14 | 	<dependencies>
15 | 		<dependency org="org.scala-sbt" name="sbt" rev="0.13.8" conf="default->default(compile)"/>
16 | 		<dependency org="com.eed3si9n" name="sbt-assembly" rev="0.14.1" conf="default->default(compile)" sbtVersion="0.13" scalaVersion="2.10"/>
17 | 		<dependency org="org.scalactic" name="scalactic_2.10" rev="2.2.1" conf="default->default(compile)"/>
18 | 		<dependency org="org.scala-lang" name="scala-library" rev="2.10.4" conf="default->default(compile)" info.apiURL="http://www.scala-lang.org/api/2.10.4/"/>
19 | 		<dependency org="org.scala-lang" name="scala-reflect" rev="2.10.4" conf="default->default(compile)" info.apiURL="http://www.scala-lang.org/api/2.10.4/"/>
20 | 		<dependency org="org.pantsbuild" name="jarjar" rev="1.6.0" conf="default->default(compile)"/>
21 | 		<dependency org="org.apache.ant" name="ant" rev="1.9.6" conf="default->default(compile)"/>
22 | 		<dependency org="org.apache.ant" name="ant-launcher" rev="1.9.6" conf="default->default(compile)"/>
23 | 		<dependency org="org.ow2.asm" name="asm" rev="5.0.4" conf="default->default(compile)"/>
24 | 		<dependency org="org.ow2.asm" name="asm-commons" rev="5.0.4" conf="default->default(compile)"/>
25 | 		<dependency org="org.ow2.asm" name="asm-tree" rev="5.0.4" conf="default->default(compile)"/>
26 | 		<dependency org="org.apache.maven" name="maven-plugin-api" rev="3.3.3" conf="default->default(compile)"/>
27 | 		<dependency org="org.apache.maven" name="maven-model" rev="3.3.3" conf="default->default(compile)"/>
28 | 		<dependency org="org.codehaus.plexus" name="plexus-utils" rev="3.0.20" conf="default->default(compile)"/>
29 | 		<dependency org="org.apache.maven" name="maven-artifact" rev="3.3.3" conf="default->default(compile)"/>
30 | 		<dependency org="org.eclipse.sisu" name="org.eclipse.sisu.plexus" rev="0.3.0" conf="default->default(compile)"/>
31 | 		<dependency org="javax.enterprise" name="cdi-api" rev="1.0" conf="default->default(compile)"/>
32 | 		<dependency org="javax.annotation" name="jsr250-api" rev="1.0" conf="default->default(compile)"/>
33 | 		<dependency org="javax.inject" name="javax.inject" rev="1" conf="default->default(compile)"/>
34 | 		<dependency org="org.eclipse.sisu" name="org.eclipse.sisu.inject" rev="0.3.0" conf="default->default(compile)"/>
35 | 		<dependency org="org.codehaus.plexus" name="plexus-component-annotations" rev="1.5.5" conf="default->default(compile)"/>
36 | 		<dependency org="org.codehaus.plexus" name="plexus-classworlds" rev="2.5.2" conf="default->default(compile)"/>
37 | 	</dependencies>
38 | </ivy-module>
39 | 


--------------------------------------------------------------------------------
/target/resolution-cache/default/classification_2.10/1.0/resolved.xml.properties:
--------------------------------------------------------------------------------
 1 | #default#classification_2.10;1.0 resolved revisions
 2 | #Fri Jun 24 11:03:35 CST 2016
 3 | +revision\:\#@\#\:+1.5.2\:\#@\#\:+module\:\#@\#\:+spark-graphx_2.10\:\#@\#\:+organisation\:\#@\#\:+org.apache.spark\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.5.2 release 1.5.2 null
 4 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-library\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.10.4 ? 2.10.5 null
 5 | +revision\:\#@\#\:+2.2.5\:\#@\#\:+module\:\#@\#\:+scalatest_2.10\:\#@\#\:+organisation\:\#@\#\:+org.scalatest\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.2.5 release 2.2.5 null
 6 | +revision\:\#@\#\:+3.1.14\:\#@\#\:+module\:\#@\#\:+mysql-connector-java\:\#@\#\:+organisation\:\#@\#\:+mysql\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=3.1.14 release 3.1.14 null
 7 | +revision\:\#@\#\:+2.2.5\:\#@\#\:+module\:\#@\#\:+scalactic_2.10\:\#@\#\:+organisation\:\#@\#\:+org.scalactic\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.2.5 release 2.2.5 null
 8 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-compiler\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.10.4 release 2.10.4 null
 9 | +revision\:\#@\#\:+1.5.2\:\#@\#\:+module\:\#@\#\:+spark-mllib_2.10\:\#@\#\:+organisation\:\#@\#\:+org.apache.spark\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.5.2 release 1.5.2 null
10 | +revision\:\#@\#\:+1.1.2\:\#@\#\:+module\:\#@\#\:+gs-core\:\#@\#\:+organisation\:\#@\#\:+org.graphstream\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.1.2 release 1.1.2 null
11 | +revision\:\#@\#\:+2.7.1\:\#@\#\:+module\:\#@\#\:+hadoop-common\:\#@\#\:+organisation\:\#@\#\:+org.apache.hadoop\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.7.1 release 2.7.1 null
12 | +revision\:\#@\#\:+1.5.2\:\#@\#\:+module\:\#@\#\:+spark-core_2.10\:\#@\#\:+organisation\:\#@\#\:+org.apache.spark\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.5.2 release 1.5.2 null
13 | 


--------------------------------------------------------------------------------
/target/resolution-cache/default/classification_2.10/1.0/resolved.xml.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <ivy-module version="2.0" xmlns:e="http://ant.apache.org/ivy/extra">
 3 | 	<info organisation="default"
 4 | 		module="classification_2.10"
 5 | 		revision="1.0"
 6 | 		status="integration"
 7 | 		publication="20160624110324"
 8 | 	>
 9 | 		<description>
10 | 		classification
11 | 		</description>
12 | 	</info>
13 | 	<configurations>
14 | 		<conf name="compile" visibility="public" description=""/>
15 | 		<conf name="runtime" visibility="public" description="" extends="compile"/>
16 | 		<conf name="test" visibility="public" description="" extends="runtime"/>
17 | 		<conf name="provided" visibility="public" description=""/>
18 | 		<conf name="optional" visibility="public" description=""/>
19 | 		<conf name="compile-internal" visibility="private" description="" extends="compile,optional,provided"/>
20 | 		<conf name="runtime-internal" visibility="private" description="" extends="runtime,optional"/>
21 | 		<conf name="test-internal" visibility="private" description="" extends="test,optional,provided"/>
22 | 		<conf name="plugin" visibility="private" description=""/>
23 | 		<conf name="sources" visibility="public" description=""/>
24 | 		<conf name="docs" visibility="public" description=""/>
25 | 		<conf name="pom" visibility="public" description=""/>
26 | 		<conf name="scala-tool" visibility="private" description=""/>
27 | 	</configurations>
28 | 	<publications>
29 | 		<artifact name="classification_2.10" type="pom" ext="pom" conf="pom"/>
30 | 		<artifact name="classification_2.10" type="jar" ext="jar" conf="compile"/>
31 | 		<artifact name="classification_2.10" type="src" ext="jar" conf="sources" e:classifier="sources"/>
32 | 		<artifact name="classification_2.10" type="doc" ext="jar" conf="docs" e:classifier="javadoc"/>
33 | 	</publications>
34 | 	<dependencies>
35 | 		<dependency org="org.scala-lang" name="scala-compiler" rev="2.10.4" conf="scala-tool->default,optional(default);compile->default(compile)"/>
36 | 		<dependency org="org.scala-lang" name="scala-library" rev="2.10.4" conf="scala-tool->default,optional(default);compile->default(compile)"/>
37 | 		<dependency org="org.scalactic" name="scalactic_2.10" rev="2.2.5" conf="test->default(compile)"/>
38 | 		<dependency org="org.scalatest" name="scalatest_2.10" rev="2.2.5" conf="test->default(compile)"/>
39 | 		<dependency org="org.apache.hadoop" name="hadoop-common" rev="2.7.1" conf="compile->default(compile)">
40 | 			<exclude org="javax.servlet" module="*" name="*" type="*" ext="*" conf="" matcher="exact"/>
41 | 		</dependency>
42 | 		<dependency org="org.apache.spark" name="spark-core_2.10" rev="1.5.2" conf="compile->default(compile)"/>
43 | 		<dependency org="org.apache.spark" name="spark-mllib_2.10" rev="1.5.2" conf="compile->default(compile)"/>
44 | 		<dependency org="mysql" name="mysql-connector-java" rev="3.1.14" conf="compile->default(compile)"/>
45 | 		<dependency org="org.graphstream" name="gs-core" rev="1.1.2" conf="compile->default(compile)"/>
46 | 		<dependency org="org.apache.spark" name="spark-graphx_2.10" rev="1.5.2" conf="compile->default(compile)"/>
47 | 	</dependencies>
48 | </ivy-module>
49 | 


--------------------------------------------------------------------------------
/target/resolution-cache/default/naturallanguageprocessing$sbt_2.10/1.0/resolved.xml.properties:
--------------------------------------------------------------------------------
 1 | #default#naturallanguageprocessing$sbt_2.10;1.0 resolved revisions
 2 | #Thu Mar 23 16:16:57 CST 2017
 3 | +revision\:\#@\#\:+5.0.4\:\#@\#\:+module\:\#@\#\:+asm-tree\:\#@\#\:+organisation\:\#@\#\:+org.ow2.asm\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=5.0.4 release 5.0.4 null
 4 | +revision\:\#@\#\:+1.9.6\:\#@\#\:+module\:\#@\#\:+ant\:\#@\#\:+organisation\:\#@\#\:+org.apache.ant\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.9.6 release 1.9.6 null
 5 | +revision\:\#@\#\:+3.0.20\:\#@\#\:+module\:\#@\#\:+plexus-utils\:\#@\#\:+organisation\:\#@\#\:+org.codehaus.plexus\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=3.0.20 release 3.0.20 null
 6 | +revision\:\#@\#\:+1.9.6\:\#@\#\:+module\:\#@\#\:+ant-launcher\:\#@\#\:+organisation\:\#@\#\:+org.apache.ant\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.9.6 release 1.9.6 null
 7 | +revision\:\#@\#\:+5.0.4\:\#@\#\:+module\:\#@\#\:+asm\:\#@\#\:+organisation\:\#@\#\:+org.ow2.asm\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=5.0.4 release 5.0.4 null
 8 | +revision\:\#@\#\:+2.2.1\:\#@\#\:+module\:\#@\#\:+scalactic_2.10\:\#@\#\:+organisation\:\#@\#\:+org.scalactic\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.2.1 release 2.2.1 null
 9 | +revision\:\#@\#\:+0.13.8\:\#@\#\:+module\:\#@\#\:+sbt\:\#@\#\:+organisation\:\#@\#\:+org.scala-sbt\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.13.8 release 0.13.8 null
10 | +revision\:\#@\#\:+1.0\:\#@\#\:+module\:\#@\#\:+jsr250-api\:\#@\#\:+organisation\:\#@\#\:+javax.annotation\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.0 release 1.0 null
11 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-reflect\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+info.apiURL\:\#@\#\:+http\://www.scala-lang.org/api/2.10.4/\:\#@\#\:=2.10.4 ? 2.10.4 null
12 | +revision\:\#@\#\:+0.3.0\:\#@\#\:+module\:\#@\#\:+org.eclipse.sisu.plexus\:\#@\#\:+organisation\:\#@\#\:+org.eclipse.sisu\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.3.0 release 0.3.0 null
13 | +revision\:\#@\#\:+1.5.5\:\#@\#\:+module\:\#@\#\:+plexus-component-annotations\:\#@\#\:+organisation\:\#@\#\:+org.codehaus.plexus\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.5.5 release 1.5.5 null
14 | +revision\:\#@\#\:+0.3.0\:\#@\#\:+module\:\#@\#\:+org.eclipse.sisu.inject\:\#@\#\:+organisation\:\#@\#\:+org.eclipse.sisu\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.3.0 release 0.3.0 null
15 | +revision\:\#@\#\:+3.3.3\:\#@\#\:+module\:\#@\#\:+maven-plugin-api\:\#@\#\:+organisation\:\#@\#\:+org.apache.maven\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=3.3.3 release 3.3.3 null
16 | +revision\:\#@\#\:+1.0\:\#@\#\:+module\:\#@\#\:+cdi-api\:\#@\#\:+organisation\:\#@\#\:+javax.enterprise\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.0 release 1.0 null
17 | +revision\:\#@\#\:+2.5.2\:\#@\#\:+module\:\#@\#\:+plexus-classworlds\:\#@\#\:+organisation\:\#@\#\:+org.codehaus.plexus\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.5.2 release 2.5.2 null
18 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-library\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:+info.apiURL\:\#@\#\:+http\://www.scala-lang.org/api/2.10.4/\:\#@\#\:=2.10.4 ? 2.10.4 null
19 | +revision\:\#@\#\:+3.3.3\:\#@\#\:+module\:\#@\#\:+maven-model\:\#@\#\:+organisation\:\#@\#\:+org.apache.maven\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=3.3.3 release 3.3.3 null
20 | +revision\:\#@\#\:+5.0.4\:\#@\#\:+module\:\#@\#\:+asm-commons\:\#@\#\:+organisation\:\#@\#\:+org.ow2.asm\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=5.0.4 release 5.0.4 null
21 | +revision\:\#@\#\:+3.3.3\:\#@\#\:+module\:\#@\#\:+maven-artifact\:\#@\#\:+organisation\:\#@\#\:+org.apache.maven\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=3.3.3 release 3.3.3 null
22 | +revision\:\#@\#\:+1.6.0\:\#@\#\:+module\:\#@\#\:+jarjar\:\#@\#\:+organisation\:\#@\#\:+org.pantsbuild\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.6.0 release 1.6.0 null
23 | +revision\:\#@\#\:+1\:\#@\#\:+module\:\#@\#\:+javax.inject\:\#@\#\:+organisation\:\#@\#\:+javax.inject\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1 release 1 null
24 | +sbtVersion\:\#@\#\:+0.13\:\#@\#\:+revision\:\#@\#\:+0.14.1\:\#@\#\:+module\:\#@\#\:+sbt-assembly\:\#@\#\:+organisation\:\#@\#\:+com.eed3si9n\:\#@\#\:+scalaVersion\:\#@\#\:+2.10\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=0.14.1 ? 0.14.1 null
25 | 


--------------------------------------------------------------------------------
/target/resolution-cache/default/naturallanguageprocessing$sbt_2.10/1.0/resolved.xml.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <ivy-module version="2.0" xmlns:e="http://ant.apache.org/ivy/extra">
 3 | 	<info organisation="default"
 4 | 		module="naturallanguageprocessing$sbt_2.10"
 5 | 		revision="1.0"
 6 | 		status="integration"
 7 | 		publication="20170323161649"
 8 | 	/>
 9 | 	<configurations>
10 | 		<conf name="default" visibility="public"/>
11 | 	</configurations>
12 | 	<publications>
13 | 	</publications>
14 | 	<dependencies>
15 | 		<dependency org="org.scala-sbt" name="sbt" rev="0.13.8" conf="default->default(compile)"/>
16 | 		<dependency org="com.eed3si9n" name="sbt-assembly" rev="0.14.1" conf="default->default(compile)" sbtVersion="0.13" scalaVersion="2.10"/>
17 | 		<dependency org="org.scalactic" name="scalactic_2.10" rev="2.2.1" conf="default->default(compile)"/>
18 | 		<dependency org="org.scala-lang" name="scala-library" rev="2.10.4" conf="default->default(compile)" info.apiURL="http://www.scala-lang.org/api/2.10.4/"/>
19 | 		<dependency org="org.scala-lang" name="scala-reflect" rev="2.10.4" conf="default->default(compile)" info.apiURL="http://www.scala-lang.org/api/2.10.4/"/>
20 | 		<dependency org="org.pantsbuild" name="jarjar" rev="1.6.0" conf="default->default(compile)"/>
21 | 		<dependency org="org.apache.ant" name="ant" rev="1.9.6" conf="default->default(compile)"/>
22 | 		<dependency org="org.apache.ant" name="ant-launcher" rev="1.9.6" conf="default->default(compile)"/>
23 | 		<dependency org="org.ow2.asm" name="asm" rev="5.0.4" conf="default->default(compile)"/>
24 | 		<dependency org="org.ow2.asm" name="asm-commons" rev="5.0.4" conf="default->default(compile)"/>
25 | 		<dependency org="org.ow2.asm" name="asm-tree" rev="5.0.4" conf="default->default(compile)"/>
26 | 		<dependency org="org.apache.maven" name="maven-plugin-api" rev="3.3.3" conf="default->default(compile)"/>
27 | 		<dependency org="org.apache.maven" name="maven-model" rev="3.3.3" conf="default->default(compile)"/>
28 | 		<dependency org="org.codehaus.plexus" name="plexus-utils" rev="3.0.20" conf="default->default(compile)"/>
29 | 		<dependency org="org.apache.maven" name="maven-artifact" rev="3.3.3" conf="default->default(compile)"/>
30 | 		<dependency org="org.eclipse.sisu" name="org.eclipse.sisu.plexus" rev="0.3.0" conf="default->default(compile)"/>
31 | 		<dependency org="javax.enterprise" name="cdi-api" rev="1.0" conf="default->default(compile)"/>
32 | 		<dependency org="javax.annotation" name="jsr250-api" rev="1.0" conf="default->default(compile)"/>
33 | 		<dependency org="javax.inject" name="javax.inject" rev="1" conf="default->default(compile)"/>
34 | 		<dependency org="org.eclipse.sisu" name="org.eclipse.sisu.inject" rev="0.3.0" conf="default->default(compile)"/>
35 | 		<dependency org="org.codehaus.plexus" name="plexus-component-annotations" rev="1.5.5" conf="default->default(compile)"/>
36 | 		<dependency org="org.codehaus.plexus" name="plexus-classworlds" rev="2.5.2" conf="default->default(compile)"/>
37 | 	</dependencies>
38 | </ivy-module>
39 | 


--------------------------------------------------------------------------------
/target/resolution-cache/meachinelearning-classification/meachinelearning-classification$sbt_2.10/1.0/resolved.xml.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <ivy-module version="2.0" xmlns:e="http://ant.apache.org/ivy/extra">
 3 | 	<info organisation="meachinelearning-classification"
 4 | 		module="meachinelearning-classification$sbt_2.10"
 5 | 		revision="1.0"
 6 | 		status="integration"
 7 | 		publication="20160707145716"
 8 | 	/>
 9 | 	<configurations>
10 | 		<conf name="default" visibility="public"/>
11 | 	</configurations>
12 | 	<publications>
13 | 	</publications>
14 | 	<dependencies>
15 | 		<dependency org="org.scala-sbt" name="sbt" rev="0.13.8" conf="default->default(compile)"/>
16 | 		<dependency org="com.eed3si9n" name="sbt-assembly" rev="0.14.1" conf="default->default(compile)" sbtVersion="0.13" scalaVersion="2.10"/>
17 | 		<dependency org="org.scalactic" name="scalactic_2.10" rev="2.2.1" conf="default->default(compile)"/>
18 | 		<dependency org="org.scala-lang" name="scala-library" rev="2.10.4" conf="default->default(compile)" info.apiURL="http://www.scala-lang.org/api/2.10.4/"/>
19 | 		<dependency org="org.scala-lang" name="scala-reflect" rev="2.10.4" conf="default->default(compile)" info.apiURL="http://www.scala-lang.org/api/2.10.4/"/>
20 | 		<dependency org="org.pantsbuild" name="jarjar" rev="1.6.0" conf="default->default(compile)"/>
21 | 		<dependency org="org.apache.ant" name="ant" rev="1.9.6" conf="default->default(compile)"/>
22 | 		<dependency org="org.apache.ant" name="ant-launcher" rev="1.9.6" conf="default->default(compile)"/>
23 | 		<dependency org="org.ow2.asm" name="asm" rev="5.0.4" conf="default->default(compile)"/>
24 | 		<dependency org="org.ow2.asm" name="asm-commons" rev="5.0.4" conf="default->default(compile)"/>
25 | 		<dependency org="org.ow2.asm" name="asm-tree" rev="5.0.4" conf="default->default(compile)"/>
26 | 		<dependency org="org.apache.maven" name="maven-plugin-api" rev="3.3.3" conf="default->default(compile)"/>
27 | 		<dependency org="org.apache.maven" name="maven-model" rev="3.3.3" conf="default->default(compile)"/>
28 | 		<dependency org="org.codehaus.plexus" name="plexus-utils" rev="3.0.20" conf="default->default(compile)"/>
29 | 		<dependency org="org.apache.maven" name="maven-artifact" rev="3.3.3" conf="default->default(compile)"/>
30 | 		<dependency org="org.eclipse.sisu" name="org.eclipse.sisu.plexus" rev="0.3.0" conf="default->default(compile)"/>
31 | 		<dependency org="javax.enterprise" name="cdi-api" rev="1.0" conf="default->default(compile)"/>
32 | 		<dependency org="javax.annotation" name="jsr250-api" rev="1.0" conf="default->default(compile)"/>
33 | 		<dependency org="javax.inject" name="javax.inject" rev="1" conf="default->default(compile)"/>
34 | 		<dependency org="org.eclipse.sisu" name="org.eclipse.sisu.inject" rev="0.3.0" conf="default->default(compile)"/>
35 | 		<dependency org="org.codehaus.plexus" name="plexus-component-annotations" rev="1.5.5" conf="default->default(compile)"/>
36 | 		<dependency org="org.codehaus.plexus" name="plexus-classworlds" rev="2.5.2" conf="default->default(compile)"/>
37 | 	</dependencies>
38 | </ivy-module>
39 | 


--------------------------------------------------------------------------------
/target/resolution-cache/meachinelearning-classification/meachinelearning-classification_2.10/1.0/resolved.xml.properties:
--------------------------------------------------------------------------------
 1 | #meachinelearning-classification#meachinelearning-classification_2.10;1.0 resolved revisions
 2 | #Thu Jul 07 14:51:12 CST 2016
 3 | +revision\:\#@\#\:+1.5.2\:\#@\#\:+module\:\#@\#\:+spark-graphx_2.10\:\#@\#\:+organisation\:\#@\#\:+org.apache.spark\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.5.2 release 1.5.2 null
 4 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-library\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.10.4 ? 2.10.5 null
 5 | +revision\:\#@\#\:+2.2.5\:\#@\#\:+module\:\#@\#\:+scalatest_2.10\:\#@\#\:+organisation\:\#@\#\:+org.scalatest\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.2.5 release 2.2.5 null
 6 | +revision\:\#@\#\:+3.1.14\:\#@\#\:+module\:\#@\#\:+mysql-connector-java\:\#@\#\:+organisation\:\#@\#\:+mysql\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=3.1.14 release 3.1.14 null
 7 | +revision\:\#@\#\:+2.2.5\:\#@\#\:+module\:\#@\#\:+scalactic_2.10\:\#@\#\:+organisation\:\#@\#\:+org.scalactic\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.2.5 release 2.2.5 null
 8 | +revision\:\#@\#\:+2.10.4\:\#@\#\:+module\:\#@\#\:+scala-compiler\:\#@\#\:+organisation\:\#@\#\:+org.scala-lang\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.10.4 release 2.10.4 null
 9 | +revision\:\#@\#\:+1.5.2\:\#@\#\:+module\:\#@\#\:+spark-mllib_2.10\:\#@\#\:+organisation\:\#@\#\:+org.apache.spark\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.5.2 release 1.5.2 null
10 | +revision\:\#@\#\:+1.1.2\:\#@\#\:+module\:\#@\#\:+gs-core\:\#@\#\:+organisation\:\#@\#\:+org.graphstream\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.1.2 release 1.1.2 null
11 | +revision\:\#@\#\:+2.7.1\:\#@\#\:+module\:\#@\#\:+hadoop-common\:\#@\#\:+organisation\:\#@\#\:+org.apache.hadoop\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=2.7.1 release 2.7.1 null
12 | +revision\:\#@\#\:+1.5.2\:\#@\#\:+module\:\#@\#\:+spark-core_2.10\:\#@\#\:+organisation\:\#@\#\:+org.apache.spark\:\#@\#\:+branch\:\#@\#\:+@\#\:NULL\:\#@\:\#@\#\:=1.5.2 release 1.5.2 null
13 | 


--------------------------------------------------------------------------------
/target/resolution-cache/meachinelearning-classification/meachinelearning-classification_2.10/1.0/resolved.xml.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <ivy-module version="2.0" xmlns:e="http://ant.apache.org/ivy/extra">
 3 | 	<info organisation="meachinelearning-classification"
 4 | 		module="meachinelearning-classification_2.10"
 5 | 		revision="1.0"
 6 | 		status="integration"
 7 | 		publication="20160707145059"
 8 | 	>
 9 | 		<description>
10 | 		MeachineLearning/classification
11 | 		</description>
12 | 	</info>
13 | 	<configurations>
14 | 		<conf name="compile" visibility="public" description=""/>
15 | 		<conf name="runtime" visibility="public" description="" extends="compile"/>
16 | 		<conf name="test" visibility="public" description="" extends="runtime"/>
17 | 		<conf name="provided" visibility="public" description=""/>
18 | 		<conf name="optional" visibility="public" description=""/>
19 | 		<conf name="compile-internal" visibility="private" description="" extends="compile,optional,provided"/>
20 | 		<conf name="runtime-internal" visibility="private" description="" extends="runtime,optional"/>
21 | 		<conf name="test-internal" visibility="private" description="" extends="test,optional,provided"/>
22 | 		<conf name="plugin" visibility="private" description=""/>
23 | 		<conf name="sources" visibility="public" description=""/>
24 | 		<conf name="docs" visibility="public" description=""/>
25 | 		<conf name="pom" visibility="public" description=""/>
26 | 		<conf name="scala-tool" visibility="private" description=""/>
27 | 	</configurations>
28 | 	<publications>
29 | 		<artifact name="meachinelearning-classification_2.10" type="pom" ext="pom" conf="pom"/>
30 | 		<artifact name="meachinelearning-classification_2.10" type="jar" ext="jar" conf="compile"/>
31 | 		<artifact name="meachinelearning-classification_2.10" type="src" ext="jar" conf="sources" e:classifier="sources"/>
32 | 		<artifact name="meachinelearning-classification_2.10" type="doc" ext="jar" conf="docs" e:classifier="javadoc"/>
33 | 	</publications>
34 | 	<dependencies>
35 | 		<dependency org="org.scala-lang" name="scala-compiler" rev="2.10.4" conf="scala-tool->default,optional(default);compile->default(compile)"/>
36 | 		<dependency org="org.scala-lang" name="scala-library" rev="2.10.4" conf="scala-tool->default,optional(default);compile->default(compile)"/>
37 | 		<dependency org="org.scalactic" name="scalactic_2.10" rev="2.2.5" conf="test->default(compile)"/>
38 | 		<dependency org="org.scalatest" name="scalatest_2.10" rev="2.2.5" conf="test->default(compile)"/>
39 | 		<dependency org="org.apache.hadoop" name="hadoop-common" rev="2.7.1" conf="compile->default(compile)">
40 | 			<exclude org="javax.servlet" module="*" name="*" type="*" ext="*" conf="" matcher="exact"/>
41 | 		</dependency>
42 | 		<dependency org="org.apache.spark" name="spark-core_2.10" rev="1.5.2" conf="compile->default(compile)"/>
43 | 		<dependency org="org.apache.spark" name="spark-mllib_2.10" rev="1.5.2" conf="compile->default(compile)"/>
44 | 		<dependency org="mysql" name="mysql-connector-java" rev="3.1.14" conf="compile->default(compile)"/>
45 | 		<dependency org="org.graphstream" name="gs-core" rev="1.1.2" conf="compile->default(compile)"/>
46 | 		<dependency org="org.apache.spark" name="spark-graphx_2.10" rev="1.5.2" conf="compile->default(compile)"/>
47 | 	</dependencies>
48 | </ivy-module>
49 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification$sources_2.10-docs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="docs"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160705152645"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification$sources_2.10-optional.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="optional"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160705152645"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification$sources_2.10-plugin.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="plugin"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160705152645"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification$sources_2.10-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="pom"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160705152645"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification$sources_2.10-provided.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="provided"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160705152645"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification$sources_2.10-sources.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="sources"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160705152645"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification_2.10-docs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="docs"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160624110324"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification_2.10-optional.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="optional"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160624110324"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification_2.10-plugin.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="plugin"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160624110324"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification_2.10-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="pom"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160624110324"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification_2.10-provided.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="provided"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160624110324"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/default-classification_2.10-sources.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="default"
 6 | 		module="classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="sources"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160624110324"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification$sources_2.10-docs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="docs"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145722"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification$sources_2.10-optional.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="optional"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145722"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification$sources_2.10-plugin.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="plugin"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145722"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification$sources_2.10-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="pom"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145722"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification$sources_2.10-provided.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="provided"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145722"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification$sources_2.10-sources.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification$sources_2.10"
 7 | 		revision="1.0"
 8 | 		conf="sources"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145722"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification_2.10-docs.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="docs"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145059"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification_2.10-optional.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="optional"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145059"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification_2.10-plugin.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="plugin"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145059"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification_2.10-pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="pom"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145059"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification_2.10-provided.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="provided"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145059"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/resolution-cache/reports/meachinelearning-classification-meachinelearning-classification_2.10-sources.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <?xml-stylesheet type="text/xsl" href="ivy-report.xsl"?>
 3 | <ivy-report version="1.0">
 4 | 	<info
 5 | 		organisation="meachinelearning-classification"
 6 | 		module="meachinelearning-classification_2.10"
 7 | 		revision="1.0"
 8 | 		conf="sources"
 9 | 		confs="compile, runtime, test, provided, optional, compile-internal, runtime-internal, test-internal, plugin, sources, docs, pom, scala-tool"
10 | 		date="20160707145059"/>
11 | 	<dependencies>
12 | 	</dependencies>
13 | </ivy-report>
14 | 


--------------------------------------------------------------------------------
/target/scala-2.10/test-classes/text/1.txt:
--------------------------------------------------------------------------------
1 | 光伏,中国人民银行,列,入,绿色,债券,支援,专案,目录,2015年12月22日,19:00:00,中国人民银行,发布,2015,第39,号,公告,公告,称为,加快,建设生态文明,引导,金融机构,服务,绿色发展,推动,经济结构转型,升级,经济发展方式转变,支援,金融机构,发行,绿色,金融债券,募集资金,支援,绿色,产业发展,笔者,目录,第5,项,清洁能源,发电,中,风力发电,光伏发电,智慧,电网,能源,因特网,分布式能源,太阳能热利用,水力发电,新能源,利用,列,入,太阳能光伏发电站,太阳能,高,温热,发电站,不含,分布式,太阳能光伏发电,系统,需,限定,条件,多晶硅,电池,组件,光电,转化,效率,≥,15.5%,组件,专案,投产,运行,日,一年,衰减率,≤,2.5%,年,衰减率,≤,0.7%,单晶硅,电池,组件,光电,转化,效率,≥,16%,组件,专案,投产,运行,日,一年,衰减率,≤,3%,年,衰减率,≤,0.7%,高,倍,聚光光伏,组件,光电,转化,效率,≥,28%,项目,投产,运行,日,一年,衰减率,≤,2%,年,衰减率,≤,0.5%,项目全生命周期,衰减率,≤,10%,硅基,薄膜电池,组件,光电,转化,效率,≥,8%,铜铟镓硒,CIGS,薄膜电池,组件,光电,转化,效率,≥,11%,碲化镉,CdTe,薄膜电池,组件,光电,转化,效率,≥,11%,薄膜电池,组件,光电,转化,效率,≥,10%,多晶硅,单晶硅,薄膜电池,项目全生命周期,衰减率,≤,20%,智能电网,能源,因特网,指,提高,供,需,负荷,平衡,回应,能力,改善,电网,综合,能效,降低,输变电,损耗,增强,可再生能源,接,入,能力,电网建设,运营,技术,升级,改造,专案,1.,智能电网,指,采用,智慧,型,电气设备,即时,双向,集成,通信技术,先进技术,电网建设,运营,专案,电网,智慧,化,升级,改造,项目,2.,能源,因特网,指,综合,电力电子,资讯,智慧,管理技术,连接,分布式能源,含,分布式,可再生能源,分布式,储能,装置,类型,负荷,能量,双向,流动,交换,共享,电网,微电网,能源,燃气,网络,设施,建设,运营,专案,分布式能源,指,区域,能源站,包括,天然气,区域,能源站,分布式光伏发电,系统,分布式能源,设施,建设,运营,分布式能源,接,入,峰谷,调节,系统,分布式,电力,交易平台,能源管理系统,建设,运营,附,中国人民银行公告,2015,第39,号,绿色,债券,支援,专案,目录


--------------------------------------------------------------------------------
/target/scala-2.10/test-classes/text/2.txt:
--------------------------------------------------------------------------------
1 | 记者,国家电网公司,获悉,9月23日,河北丰宁,二期,山东文登,重庆,蟠龙,抽水蓄能电站,工程,以下简称,丰宁,二期,文登,蟠龙,抽,蓄,座,抽,蓄,电站,正式,开工,总投资,244.4亿,元,总装机容量,480万,千瓦,计划,2022年,竣工,投产,项目,预计,增加,发电,装备制造业,产值,111亿,元,推动,相关,装备制造业,发展,开工,动员大会,国家电网公司,董事长,党组书记,刘振亚,丰宁,二期,文登,蟠龙,抽,蓄,国家电网公司,推进,特高压电网,建设,服务,清洁能源,发展,重大工程,继,2015年6月,安徽金寨,山东沂蒙,河南,天池,座,抽水蓄能电站,第二批,开工,电站,标志,我国,抽水蓄能电站,加快,发展,新,阶段,介绍,河北丰宁,二期,抽水蓄能电站,项目,位于,河北省承德市,丰宁县,装机容量,180万,千瓦,安装,台,30万,千瓦,可逆,式,水轮发电机组,500,千伏,电压,接,入,华北电网,工程投资,87.5亿,元,丰宁抽水蓄能电站,一期,二期,装机容量,360万,千瓦,世界上,装机容量,抽水蓄能电站,山东,文登抽水蓄能电站,位于,山东省,威海市文登区,装机容量,180万,千瓦,安装,台,30万,千瓦,可逆,式,水轮发电机组,500,千伏,电压,接,入,山东电网,工程投资,85.7亿,元,重庆,蟠龙,抽水蓄能电站,位于,重庆市綦江区,装机容量,120万,千瓦,安装,台,30万,千瓦,可逆,式,水轮发电机组,500,千伏,电压,接,入,重庆电网,工程投资,71.2亿,元,国网,座,受,端,电网,地区,抽水蓄能电站,建成,更好地,接纳,区,外,来电,优化,电源,结构,提高,北,西南,地区,清洁能源,消纳,能力,提高,特高压电网,系统安全,可靠性,综合,煤电,机组,消纳,清洁能源,效果,建设,丰宁,二期,文登,蟠龙,抽,蓄,年,节约,原煤,消耗,291万,吨,减排,烟尘,0.3万,吨,二氧化硫,1.4万,吨,氮氧化物,1.3万,吨,二氧化碳,485万,吨,节能减排,大气污染防治,国家电网公司,经营,区域,内在,运,抽水蓄能电站,装机容量,1674.5万,千瓦,建,规模,1880万,千瓦,预计,2017年,我国,抽水蓄能,装机,3300万,千瓦,超过,美国,世界上,抽水蓄能电站,第一,大国


--------------------------------------------------------------------------------
/target/scala-2.10/test-classes/text/abstract:
--------------------------------------------------------------------------------
1 | 算法可大致分为基本算法、数据结构的算法、数论算法、计算几何的算法、图的算法、动态规划以及数值分析、加密算法、排序算法、检索算法、随机化算法、并行算法、厄米变形模型、随机森林算法。
2 | 算法可以宽泛的分为三类，
3 | 一，有限的确定性算法，这类算法在有限的一段时间内终止。他们可能要花很长时间来执行指定的任务，但仍将在一定的时间内终止。这类算法得出的结果常取决于输入值。
4 | 二，有限的非确定算法，这类算法在有限的时间内终止。然而，对于一个（或一些）给定的数值，算法的结果并不是唯一的或确定的。
5 | 三，无限的算法，是那些由于没有定义终止定义条件，或定义的条件无法由输入的数据满足而不终止运行的算法。通常，无限算法的产生是由于未能确定的定义终止条件。


--------------------------------------------------------------------------------
/target/streams/$global/$global/dumpStructure/$global/streams/out:
--------------------------------------------------------------------------------
1 | [info] Writing structure to /private/var/folders/7j/trxrd6ms0rg3v8tlck57__4h0000gn/T/sbt-structure0.xml...
2 | [info] Done.
3 | 


--------------------------------------------------------------------------------
/target/streams/$global/clean/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/target/streams/$global/clean/$global/streams/out


--------------------------------------------------------------------------------
/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.10/input_dsp:
--------------------------------------------------------------------------------
1 |     org.scala-lang scala-library 2.10.4                   
2 | com.kunyan nlpsuit-package 0.2.8.3                   org.scalactic 	scalactic 2.2.5 test                 org.scalatest 	scalatest 2.2.5 test                 org.scala-lang scala-compiler 2.10.4                   org.apache.hadoop hadoop-common 2.7.1          javax.servlet * *             org.apache.hadoop hadoop-hdfs 2.7.1 provided                  org.apache.spark spark-core_2.10 1.5.2                   org.apache.spark spark-mllib_2.10 1.5.2                   mysql mysql-connector-java 3.1.14                   org.graphstream gs-core 1.1.2                   org.apache.spark spark-graphx_2.10 1.5.2                   com.ibm.icu icu4j 56.1                   org.apache.hbase hbase 0.98.2-hadoop2                   org.apache.hbase hbase-client 1.1.2                   org.apache.hbase hbase-common 1.1.2                   org.apache.hbase hbase-server 1.1.2                   org.scalanlp breeze-math_2.10 0.4                    org.scalanlp breeze-process_2.10 0.3                    org.scalanlp breeze-viz_2.10 0.12          org.scalanlp breeze_2.10 *             org.scalanlp nak_2.10 1.3                   redis.clients jedis 2.8.0                   org.ansj ansj_seg 5.0.2                   org.json json 20160212                   	org.nlpcn nlp-lang 1.7                  


--------------------------------------------------------------------------------
/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.10/output_dsp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/target/streams/$global/dependencyPositions/$global/streams/update_cache_2.10/output_dsp


--------------------------------------------------------------------------------
/target/streams/$global/ivyConfiguration/$global/streams/out:
--------------------------------------------------------------------------------
1 | [debug] Other repositories:
2 | [debug] Default repositories:
3 | [debug] Using inline dependencies specified in Scala.
4 | 


--------------------------------------------------------------------------------
/target/streams/$global/ivySbt/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/target/streams/$global/ivySbt/$global/streams/out


--------------------------------------------------------------------------------
/target/streams/$global/projectDescriptors/$global/streams/out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/target/streams/$global/projectDescriptors/$global/streams/out


--------------------------------------------------------------------------------
/target/streams/$global/update/$global/streams/update_cache_2.10/inputs:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/target/streams/$global/update/$global/streams/update_cache_2.10/inputs


--------------------------------------------------------------------------------
/target/streams/$global/update/$global/streams/update_cache_2.10/output:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/STHSF/NaturalLanguageProcessing_Spark/43d4b89910f169879606affec6cf04defb6603e2/target/streams/$global/update/$global/streams/update_cache_2.10/output


--------------------------------------------------------------------------------
/target/streams/compile/unmanagedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/target/streams/compile/unmanagedJars/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/target/streams/runtime/unmanagedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/target/streams/runtime/unmanagedJars/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/target/streams/test/unmanagedClasspath/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/target/streams/test/unmanagedJars/$global/streams/export:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------