├── README.md
└── com.homework
    ├── .classpath
    ├── .classpath.bak
    ├── .gitignore
    ├── .gitignore.bak
    ├── .project
    ├── .settings
        ├── org.eclipse.core.resources.prefs
        ├── org.eclipse.jdt.core.prefs
        └── org.eclipse.m2e.core.prefs
    ├── datafile
        ├── association
        │   ├── cnItems.dat
        │   ├── fpg
        │   ├── fpg2
        │   ├── items
        │   └── user2items.csv
        ├── cluster
        │   ├── data.csv
        │   └── simple_k-means.txt
        ├── decisiontree
        │   ├── test
        │   │   └── in
        │   │   │   └── weather.nominal.arff
        │   └── train
        │   │   └── in
        │   │       └── weather.nominal.arff
        ├── hosts
        ├── hosts.txt
        ├── naivebayes
        │   ├── test
        │   │   └── in
        │   │   │   └── test.arff
        │   └── train
        │   │   ├── in
        │   │       └── weather.nominal.arff
        │   │   └── out
        │   │       └── trainresult.arff
        ├── week5
        │   ├── Ma
        │   ├── Mb
        │   ├── SparseMatrix
        │   │   ├── a.txt
        │   │   └── b.txt
        │   ├── a.txt
        │   ├── b.txt
        │   ├── small.csv
        │   ├── small2.csv
        │   └── test
        │   │   ├── Ma
        │   │   └── Mb
        └── week6
        │   ├── job.csv
        │   └── pv.csv
    ├── lib
        ├── je-analysis-1.5.1.jar
        ├── lucene-core-2.3.0.jar
        ├── lucene-core-3.1.0.jar
        ├── paoding-analysis.jar
        └── 说明
    ├── pom.xml
    ├── scripts
        ├── clustering
        │   └── canopy
        │   │   ├── canopy-mahout.txt
        │   │   └── canopy.dat
        ├── fp-growth
        │   ├── fpg-mahout.txt
        │   └── fpg.txt
        ├── hive
        │   ├── HiveJDBC.java
        │   └── sql.hive
        ├── week10
        │   ├── 1.pig
        │   ├── common_friend.pig
        │   ├── karate.csv
        │   ├── w10.pig
        │   ├── 杂文件
        │   │   ├── common_prj.java.bak
        │   │   ├── karate2.csv
        │   │   ├── karate2.csv.bak
        │   │   ├── mytest.txt
        │   │   ├── noway
        │   │   └── tes2.txt
        │   └── 计算33的好友推荐(不关注别人的没有推荐)
        │   │   ├── common.java
        │   │   ├── common.java.bak
        │   │   ├── common_flt.java
        │   │   ├── common_flt.java.bak
        │   │   ├── common_grp.java
        │   │   ├── common_jnd.java
        │   │   ├── common_prj.java
        │   │   ├── pig.pig
        │   │   └── user.java
        ├── week13
        │   └── week13
        ├── week8.rar
        ├── week8
        │   ├── homework.txt
        │   ├── week8.pig
        │   └── week8.txt
        └── week9
        │   └── pagerank.r
    └── src
        ├── common
            └── com
            │   └── homework
            │       └── hdfs
            │           ├── HdfsDAO.java
            │           └── package-info.java
        ├── hadoop
            └── machinelearning
            │   └── clustering
            │       └── hadoop
            │           └── machinelearning
            │               └── clustering
            │                   ├── canopy
            │                       └── package-info.java
            │                   └── kmeans
            │                       ├── KmeansHadoop.java
            │                       └── package-info.java
        ├── main
            └── java
            │   └── com
            │       └── homework
            │           └── App.java
        ├── mommon
            ├── com
            │   └── homework
            │   │   └── mommon
            │   │       ├── ComTest.java
            │   │       └── package-info.java
            └── mytest
            │   ├── MenuTree.java
            │   ├── Node.java
            │   ├── Recursive.java
            │   └── package-info.java
        ├── sequence
            └── machinelearning
            │   ├── association
            │       └── sequence
            │       │   └── machinelearning
            │       │       └── association
            │       │           ├── apriori
            │       │               ├── ItemMap.java
            │       │               ├── MyApriori.java
            │       │               ├── Subset.java
            │       │               └── package-info.java
            │       │           ├── common
            │       │               ├── Definition.java
            │       │               ├── Mytest.java
            │       │               ├── ReadData.java
            │       │               ├── SortTest.java
            │       │               ├── Transaction.java
            │       │               └── package-info.java
            │       │           ├── fpgrowth
            │       │               ├── Myfptree2.java
            │       │               ├── TreeNode2.java
            │       │               └── package-info.java
            │       │           ├── fpgtest
            │       │               ├── FPTree.java
            │       │               ├── TreeNode.java
            │       │               └── package-info.java
            │       │           └── otherdemo
            │       │               ├── Apriori.java
            │       │               ├── Apriori_1.java
            │       │               ├── Apriori_NathanMagnus.java
            │       │               └── package-info.java
            │   ├── clustering
            │       └── sequence
            │       │   └── machinelearning
            │       │       └── clustering
            │       │           ├── canopy
            │       │               ├── MyCanopy.java
            │       │               ├── Point.java
            │       │               ├── UserPoint.java
            │       │               └── package-info.java
            │       │           └── kmeans
            │       │               ├── MyKmeans.java
            │       │               ├── MyKmeansForUser.java
            │       │               └── package-info.java
            │   ├── decisiontree
            │       └── sequence
            │       │   └── machinelearning
            │       │       └── decisiontree
            │       │           ├── c45
            │       │               ├── DecisionTreeNode.java
            │       │               ├── DecisionTreeUtil.java
            │       │               ├── SequenceComparator.java
            │       │               ├── c4.java
            │       │               └── package-info.java
            │       │           ├── id3
            │       │               ├── DicisionTree.java
            │       │               ├── OtherID3.java
            │       │               └── package-info.java
            │       │           ├── id3test
            │       │               ├── DTreeUtil.java
            │       │               ├── ID3.java
            │       │               ├── SequenceComparator.java
            │       │               ├── TreeNode.java
            │       │               └── package-info.java
            │       │           ├── myc45
            │       │               └── package-info.java
            │       │           └── myid3
            │       │               ├── Maxgain.java
            │       │               ├── MyID3.java
            │       │               ├── Point.java
            │       │               ├── TheMath.java
            │       │               ├── TreeNode.java
            │       │               └── package-info.java
            │   └── naivebayes
            │       └── sequence
            │           └── machinelearning
            │               └── naivebayes
            │                   ├── bayesdemo
            │                       ├── Main.java
            │                       ├── Test.java
            │                       ├── Train.java
            │                       └── package-info.java
            │                   └── textmining
            │                       ├── ParticipleTest.java
            │                       └── package-info.java
        ├── test
            └── java
            │   └── com
            │       └── homework
            │           └── AppTest.java
        ├── week2
            ├── business
            │   ├── DayIp.java
            │   ├── StatPV.java
            │   └── package-info.java
            └── entity
            │   ├── Kpi.java
            │   └── package-info.java
        ├── week3
            ├── mine
            │   ├── Outinfo.java
            │   ├── StationInfo.java
            │   ├── StayTime.java
            │   ├── StayTime2.java
            │   ├── StayTime2改造前备份.rar
            │   ├── my.net
            │   ├── my.pos
            │   └── package-info.java
            └── tutorial
            │   ├── BaseStationDataPreprocess.java
            │   ├── TableLine.java
            │   └── package-info.java
        ├── week5
            ├── matrix
            │   ├── Bigmmult.java
            │   ├── MatrixMult.java
            │   ├── Multiply.java
            │   ├── MyTest.java
            │   ├── Recommend.java
            │   ├── SparseMatrix.java
            │   └── package-info.java
            └── recommend
            │   ├── MainPodium.java
            │   ├── Step1.java
            │   ├── Step2.java
            │   ├── Step3.java
            │   ├── Step4.java
            │   └── package-info.java
        ├── week6
            ├── filterSalary
            │   ├── Main.java
            │   ├── Step0.java
            │   ├── Step1.java
            │   ├── Step2.java
            │   ├── Step3.java
            │   └── package-info.java
            ├── recommendJob
            │   ├── ItemLoglikelihood.java
            │   ├── UserCityBlock.java
            │   └── package-info.java
            └── test
            │   └── package-info.java
        ├── week7
            ├── classfier
            │   ├── Main.java
            │   ├── PaodingFirst.java
            │   ├── PaodingTest.java
            │   └── package-info.java
            ├── dic
            │   ├── .compiled
            │   │   └── most-words-mode
            │   │   │   ├── .metadata
            │   │   │   ├── vocabulary.dic.compiled
            │   │   │   ├── x-confucian-family-name.dic.compiled
            │   │   │   ├── x-for-combinatorics.dic.compiled
            │   │   │   ├── x-noise-charactor.dic.compiled
            │   │   │   ├── x-noise-word.dic.compiled
            │   │   │   └── x-unit.dic.compiled
            │   ├── administrative.dic
            │   ├── appellation.dic
            │   ├── company.dic
            │   ├── comupter-science.dic
            │   ├── contemporary-words.dic
            │   ├── division
            │   │   ├── africa.dic
            │   │   ├── america.dic
            │   │   ├── china.dic
            │   │   ├── europe.dic
            │   │   ├── japan.dic
            │   │   ├── korea.dic
            │   │   ├── oceania.dic
            │   │   ├── readme.txt
            │   │   └── taiwan.dic
            │   ├── festival.dic
            │   ├── language.dic
            │   ├── locale
            │   │   ├── beijing.dic
            │   │   ├── fuzhou.dic
            │   │   ├── quanzhou.dic
            │   │   ├── readme.txt
            │   │   └── xiamen.dic
            │   ├── name-foreign.dic
            │   ├── nation.dic
            │   ├── org-domestic.dic
            │   ├── org-foreign.dic
            │   ├── paoding-dic-names.properties
            │   ├── star-domestic.dic
            │   ├── star-foreign.dic
            │   ├── t-base.dic
            │   ├── x-confucian-family-name.dic
            │   ├── x-for-combinatorics.dic
            │   ├── x-noise-charactor.dic
            │   ├── x-noise-word.dic
            │   └── x-unit.dic
            └── myInputFormat
            │   ├── JamesInputFormat.java
            │   ├── JamesRecordReader.java
            │   └── package-info.java
        └── week8
            └── mrclassify
                └── package-info.java


/README.md:
--------------------------------------------------------------------------------
1 | myhomework
2 | ==========
3 | 


--------------------------------------------------------------------------------
/com.homework/.classpath:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
10 | 		<attributes>
11 | 			<attribute name="optional" value="true"/>
12 | 			<attribute name="maven.pomderived" value="true"/>
13 | 		</attributes>
14 | 	</classpathentry>
15 | 	<classpathentry kind="src" path="src/week2"/>
16 | 	<classpathentry kind="src" path="src/week3"/>
17 | 	<classpathentry kind="src" path="src/week5"/>
18 | 	<classpathentry kind="src" path="src/mommon"/>
19 | 	<classpathentry kind="src" path="src/week6"/>
20 | 	<classpathentry kind="src" path="src/common"/>
21 | 	<classpathentry kind="src" path="src/week7"/>
22 | 	<classpathentry kind="src" path="src/week8"/>
23 | 	<classpathentry kind="src" path="src/sequence/machinelearning/clustering"/>
24 | 	<classpathentry kind="src" path="src/sequence/machinelearning/association"/>
25 | 	<classpathentry kind="src" path="src/hadoop/machinelearning/clustering"/>
26 | 	<classpathentry kind="src" path="src/sequence/machinelearning/decisiontree"/>
27 | 	<classpathentry kind="src" path="src/sequence/machinelearning/naivebayes"/>
28 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
29 | 		<attributes>
30 | 			<attribute name="maven.pomderived" value="true"/>
31 | 		</attributes>
32 | 	</classpathentry>
33 | 	<classpathentry kind="lib" path="lib/paoding-analysis.jar"/>
34 | 	<classpathentry kind="lib" path="lib/je-analysis-1.5.1.jar"/>
35 | 	<classpathentry kind="lib" path="lib/lucene-core-2.3.0.jar"/>
36 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
37 | 		<attributes>
38 | 			<attribute name="maven.pomderived" value="true"/>
39 | 		</attributes>
40 | 	</classpathentry>
41 | 	<classpathentry kind="output" path="target/classes"/>
42 | </classpath>
43 | 


--------------------------------------------------------------------------------
/com.homework/.classpath.bak:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <classpath>
 3 | 	<classpathentry kind="src" output="target/classes" path="src/main/java">
 4 | 		<attributes>
 5 | 			<attribute name="optional" value="true"/>
 6 | 			<attribute name="maven.pomderived" value="true"/>
 7 | 		</attributes>
 8 | 	</classpathentry>
 9 | 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
10 | 		<attributes>
11 | 			<attribute name="optional" value="true"/>
12 | 			<attribute name="maven.pomderived" value="true"/>
13 | 		</attributes>
14 | 	</classpathentry>
15 | 	<classpathentry kind="src" path="src/week2"/>
16 | 	<classpathentry kind="src" path="src/week3"/>
17 | 	<classpathentry kind="src" path="src/week5"/>
18 | 	<classpathentry kind="src" path="src/mommon"/>
19 | 	<classpathentry kind="src" path="src/week6"/>
20 | 	<classpathentry kind="src" path="src/common"/>
21 | 	<classpathentry kind="src" path="src/week7"/>
22 | 	<classpathentry kind="src" path="src/week8"/>
23 | 	<classpathentry kind="src" path="src/sequence/machinelearning/clustering"/>
24 | 	<classpathentry kind="src" path="src/sequence/machinelearning/association"/>
25 | 	<classpathentry kind="src" path="src/hadoop/machinelearning/clustering"/>
26 | 	<classpathentry kind="src" path="src/sequence/machinelearning/decisiontree"/>
27 | 	<classpathentry kind="src" path="src/sequence/machinelearning/naivebayes"/>
28 | 	<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
29 | 		<attributes>
30 | 			<attribute name="maven.pomderived" value="true"/>
31 | 		</attributes>
32 | 	</classpathentry>
33 | 	<classpathentry kind="lib" path="lib/paoding-analysis.jar"/>
34 | 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
35 | 		<attributes>
36 | 			<attribute name="maven.pomderived" value="true"/>
37 | 		</attributes>
38 | 	</classpathentry>
39 | 	<classpathentry kind="lib" path="lib/commons-logging.jar"/>
40 | 	<classpathentry kind="lib" path="lib/junit.jar"/>
41 | 	<classpathentry kind="lib" path="lib/lucene-analyzers-2.2.0.jar"/>
42 | 	<classpathentry kind="lib" path="lib/lucene-core-2.2.0.jar"/>
43 | 	<classpathentry kind="lib" path="lib/lucene-highlighter-2.2.0.jar"/>
44 | 	<classpathentry kind="output" path="target/classes"/>
45 | </classpath>
46 | 


--------------------------------------------------------------------------------
/com.homework/.gitignore:
--------------------------------------------------------------------------------
 1 | /target/
 2 | /target/classes/META-INF/maven/com/com.homework/pom.properties
 3 | .project
 4 | .settings
 5 | target
 6 | *.log
 7 | data
 8 | build
 9 | bin
10 | assets
11 | runtime
12 | *.class
13 | *.war
14 | *.ear
15 | input
16 | output
17 | 
18 | 


--------------------------------------------------------------------------------
/com.homework/.gitignore.bak:
--------------------------------------------------------------------------------
 1 | /target/
 2 | /target/classes/META-INF/maven/com/com.homework/pom.properties
 3 | 
 4 | .project
 5 | 
 6 | .settings
 7 | target
 8 | *.log
 9 | data
10 | build
11 | bin
12 | assets
13 | runtime
14 | *.class
15 | *.war
16 | *.ear
17 | input
18 | output
19 | 
20 | 


--------------------------------------------------------------------------------
/com.homework/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>com.homework</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.jdt.core.javabuilder</name>
10 | 			<arguments>
11 | 			</arguments>
12 | 		</buildCommand>
13 | 		<buildCommand>
14 | 			<name>org.eclipse.m2e.core.maven2Builder</name>
15 | 			<arguments>
16 | 			</arguments>
17 | 		</buildCommand>
18 | 	</buildSpec>
19 | 	<natures>
20 | 		<nature>org.eclipse.jdt.core.javanature</nature>
21 | 		<nature>org.eclipse.m2e.core.maven2Nature</nature>
22 | 	</natures>
23 | </projectDescription>
24 | 


--------------------------------------------------------------------------------
/com.homework/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding//scripts/fp-growth/fpg-mahout.txt=UTF-8
3 | encoding//src/main/java=UTF-8
4 | encoding//src/test/java=UTF-8
5 | encoding/<project>=UTF-8
6 | 


--------------------------------------------------------------------------------
/com.homework/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
 1 | eclipse.preferences.version=1
 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
 5 | org.eclipse.jdt.core.compiler.compliance=1.7
 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
12 | org.eclipse.jdt.core.compiler.source=1.7
13 | 


--------------------------------------------------------------------------------
/com.homework/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 | 


--------------------------------------------------------------------------------
/com.homework/datafile/association/cnItems.dat:
--------------------------------------------------------------------------------
1 | 1	牛奶,鸡蛋,面包,薯片
2 | 2	鸡蛋,爆米花,薯片,啤酒
3 | 3	鸡蛋,面包,薯片
4 | 4	牛奶,鸡蛋,面包,爆米花,薯片,啤酒
5 | 5	牛奶,面包,啤酒
6 | 6	鸡蛋,面包,啤酒
7 | 7	牛奶,面包,薯片
8 | 8	牛奶,鸡蛋,面包,黄油,薯片
9 | 9	牛奶,鸡蛋,黄油,薯片


--------------------------------------------------------------------------------
/com.homework/datafile/association/fpg:
--------------------------------------------------------------------------------
1 | 牛奶,鸡蛋,面包,薯片
2 | 鸡蛋,爆米花,薯片,啤酒
3 | 鸡蛋,面包,薯片
4 | 牛奶,鸡蛋,面包,爆米花,薯片,啤酒
5 | 牛奶,面包,啤酒
6 | 鸡蛋,面包,啤酒
7 | 牛奶,面包,薯片
8 | 牛奶,鸡蛋,面包,黄油,薯片
9 | 牛奶,鸡蛋,黄油,薯片


--------------------------------------------------------------------------------
/com.homework/datafile/association/fpg2:
--------------------------------------------------------------------------------
1 | I1,I2,I5
2 | I2,I4
3 | I2,I3
4 | I1,I2,I4 
5 | I1,I3 
6 | I2,I3
7 | I1,I3
8 | I1,I2,I3,I5
9 | I1,I2,I3


--------------------------------------------------------------------------------
/com.homework/datafile/association/items:
--------------------------------------------------------------------------------
1 | T100	I1,I2,I5
2 | T200	I2,I4
3 | T300	I2,I3
4 | T400	I1,I2,I4
5 | T500	I1,I3
6 | T600	I2,I3
7 | T700	I1,I3
8 | T800	I1,I2,I3,I5
9 | T900	I1,I2,I3


--------------------------------------------------------------------------------
/com.homework/datafile/cluster/simple_k-means.txt:
--------------------------------------------------------------------------------
1 | 1 1
2 | 2 1
3 | 1 2
4 | 2 2
5 | 3 3
6 | 8 8
7 | 8 9
8 | 9 8
9 | 9 9


--------------------------------------------------------------------------------
/com.homework/datafile/decisiontree/test/in/weather.nominal.arff:
--------------------------------------------------------------------------------
 1 | 
 2 | #存放做决策的属性，一般是或否
 3 | @decision
 4 | yes,no
 5 | 
 6 | @attribute outlook {sunny, overcast, rainy}
 7 | @attribute temperature {hot, mild, cool}
 8 | @attribute humidity {high, normal}
 9 | @attribute windy {TRUE, FALSE}
10 | 
11 |  
12 | @data
13 | sunny,hot,high,FALSE,no
14 | sunny,hot,high,TRUE,no
15 | overcast,hot,high,FALSE,yes
16 | rainy,mild,high,FALSE,yes
17 | rainy,cool,normal,FALSE,yes
18 | rainy,cool,normal,TRUE,no
19 | overcast,cool,normal,TRUE,yes
20 | sunny,mild,high,FALSE,no
21 | sunny,cool,normal,FALSE,yes
22 | rainy,mild,normal,FALSE,yes
23 | sunny,mild,normal,TRUE,yes
24 | overcast,mild,high,TRUE,yes
25 | overcast,hot,normal,FALSE,yes
26 | rainy,mild,high,TRUE,no


--------------------------------------------------------------------------------
/com.homework/datafile/decisiontree/train/in/weather.nominal.arff:
--------------------------------------------------------------------------------
 1 | 
 2 | #存放做决策的属性，一般是或否
 3 | @decision
 4 | yes,no
 5 | 
 6 | @attribute outlook {sunny, overcast, rainy}
 7 | @attribute temperature {hot, mild, cool}
 8 | @attribute humidity {high, normal}
 9 | @attribute windy {TRUE, FALSE}
10 | 
11 |  
12 | @data
13 | sunny,hot,high,FALSE,no
14 | sunny,hot,high,TRUE,no
15 | overcast,hot,high,FALSE,yes
16 | rainy,mild,high,FALSE,yes
17 | rainy,cool,normal,FALSE,yes
18 | rainy,cool,normal,TRUE,no
19 | overcast,cool,normal,TRUE,yes
20 | sunny,mild,high,FALSE,no
21 | sunny,cool,normal,FALSE,yes
22 | rainy,mild,normal,FALSE,yes
23 | sunny,mild,normal,TRUE,yes
24 | overcast,mild,high,TRUE,yes
25 | overcast,hot,normal,FALSE,yes
26 | rainy,mild,high,TRUE,no


--------------------------------------------------------------------------------
/com.homework/datafile/naivebayes/test/in/test.arff:
--------------------------------------------------------------------------------
 1 | @decision
 2 | yes,no
 3 | @attribute outlook {sunny, overcast, rainy}
 4 | @attribute temperature {hot, mild, cool}
 5 | @attribute humidity {high, normal}
 6 | @attribute windy {TRUE, FALSE}
 7 | @data
 8 | sunny,hot,high,FALSE
 9 | overcast,mild,high,TRUE
10 | overcast,hot,normal,FALSE
11 | rainy,mild,high,TRUE


--------------------------------------------------------------------------------
/com.homework/datafile/naivebayes/train/in/weather.nominal.arff:
--------------------------------------------------------------------------------
 1 | #存放做决策的属性，一般是或否
 2 | @decision
 3 | yes,no
 4 | @attribute outlook {sunny, overcast, rainy}
 5 | @attribute temperature {hot, mild, cool}
 6 | @attribute humidity {high, normal}
 7 | @attribute windy {TRUE, FALSE}
 8 | @data
 9 | sunny,hot,high,FALSE,no
10 | sunny,hot,high,TRUE,no
11 | overcast,hot,high,FALSE,yes
12 | rainy,mild,high,FALSE,yes
13 | rainy,cool,normal,FALSE,yes
14 | rainy,cool,normal,TRUE,no
15 | overcast,cool,normal,TRUE,yes
16 | sunny,mild,high,FALSE,no
17 | sunny,cool,normal,FALSE,yes
18 | rainy,mild,normal,FALSE,yes
19 | sunny,mild,normal,TRUE,yes
20 | overcast,mild,high,TRUE,yes
21 | overcast,hot,normal,FALSE,yes
22 | rainy,mild,high,TRUE,no


--------------------------------------------------------------------------------
/com.homework/datafile/naivebayes/train/out/trainresult.arff:
--------------------------------------------------------------------------------
 1 | @decision P(yes) {0.7142857142857143}
 2 | @decision P(no) {0.42857142857142855}
 3 | @data
 4 | P(outlook=sunny|yes),0.3
 5 | P(outlook=sunny|no),0.6666666666666666
 6 | P(outlook=overcast|yes),0.5
 7 | P(outlook=overcast|no),0.16666666666666666
 8 | P(outlook=rainy|yes),0.4
 9 | P(outlook=rainy|no),0.5
10 | P(temperature=hot|yes),0.3
11 | P(temperature=hot|no),0.5
12 | P(temperature=mild|yes),0.5
13 | P(temperature=mild|no),0.5
14 | P(temperature=cool|yes),0.4
15 | P(temperature=cool|no),0.3333333333333333
16 | P(humidity=high|yes),0.4
17 | P(humidity=high|no),0.8333333333333334
18 | P(humidity=normal|yes),0.7
19 | P(humidity=normal|no),0.3333333333333333
20 | P(windy=TRUE|yes),0.4
21 | P(windy=TRUE|no),0.6666666666666666
22 | P(windy=FALSE|yes),0.7
23 | P(windy=FALSE|no),0.5
24 | 


--------------------------------------------------------------------------------
/com.homework/datafile/week5/Ma:
--------------------------------------------------------------------------------
1 | 1,1,1
2 | 2,1,2
3 | 2,2,3


--------------------------------------------------------------------------------
/com.homework/datafile/week5/Mb:
--------------------------------------------------------------------------------
1 | 1,1,2
2 | 1,2,4
3 | 2,1,1
4 | 2,2,2


--------------------------------------------------------------------------------
/com.homework/datafile/week5/SparseMatrix/a.txt:
--------------------------------------------------------------------------------
 1 | 1,1,1
 2 | 1,2,2
 3 | 1,3,3
 4 | 2,1,4
 5 | 2,2,5
 6 | 3,1,7
 7 | 3,2,8
 8 | 3,3,9
 9 | 4,1,10
10 | 4,2,11
11 | 4,3,12


--------------------------------------------------------------------------------
/com.homework/datafile/week5/SparseMatrix/b.txt:
--------------------------------------------------------------------------------
1 | 1,1,10
2 | 1,2,15
3 | 2,2,2
4 | 3,1,11
5 | 3,2,9


--------------------------------------------------------------------------------
/com.homework/datafile/week5/a.txt:
--------------------------------------------------------------------------------
 1 | 1,1,1
 2 | 1,2,2
 3 | 1,3,3
 4 | 2,1,4
 5 | 2,2,5
 6 | 3,1,7
 7 | 3,2,8
 8 | 3,3,9
 9 | 4,1,10
10 | 4,2,11
11 | 4,3,12


--------------------------------------------------------------------------------
/com.homework/datafile/week5/b.txt:
--------------------------------------------------------------------------------
1 | 1,1,10
2 | 1,2,15
3 | 2,2,2
4 | 3,1,11
5 | 3,2,9


--------------------------------------------------------------------------------
/com.homework/datafile/week5/small.csv:
--------------------------------------------------------------------------------
 1 | 1,101,5.0
 2 | 1,102,3.0
 3 | 1,103,2.5
 4 | 2,101,2.0
 5 | 2,102,2.5
 6 | 2,103,5.0
 7 | 2,104,2.0
 8 | 3,101,2.0
 9 | 3,104,4.0
10 | 3,105,4.5
11 | 3,107,5.0
12 | 4,101,5.0
13 | 4,103,3.0
14 | 4,104,4.5
15 | 4,106,4.0
16 | 5,101,4.0
17 | 5,102,3.0
18 | 5,103,2.0
19 | 5,104,4.0
20 | 5,105,3.5
21 | 5,106,4.0


--------------------------------------------------------------------------------
/com.homework/datafile/week5/small2.csv:
--------------------------------------------------------------------------------
 1 | 1,101,5.0
 2 | 1,102,3.0
 3 | 1,103,2.5
 4 | 2,101,2.0
 5 | 2,102,2.5
 6 | 2,103,5.0
 7 | 2,104,2.0
 8 | 3,101,2.0
 9 | 3,104,4.0
10 | 3,105,4.5
11 | 3,107,5.0
12 | 4,101,5.0
13 | 4,103,3.0
14 | 4,104,4.5
15 | 4,106,4.0
16 | 5,101,4.0
17 | 5,102,3.0
18 | 5,103,2.0
19 | 5,104,4.0
20 | 5,105,3.5
21 | 5,106,4.0
22 | 6,102,4.0
23 | 6,103,2.0
24 | 6,105,3.5
25 | 6,107,4.0


--------------------------------------------------------------------------------
/com.homework/datafile/week5/test/Ma:
--------------------------------------------------------------------------------
1 | 1,1,1
2 | 1,2,2
3 | 2,1,2
4 | 2,2,3


--------------------------------------------------------------------------------
/com.homework/datafile/week5/test/Mb:
--------------------------------------------------------------------------------
1 | 1,1,2
2 | 1,2,4
3 | 2,1,1
4 | 2,2,2


--------------------------------------------------------------------------------
/com.homework/datafile/week6/job.csv:
--------------------------------------------------------------------------------
  1 | 1,2013-01-24,5600
  2 | 2,2011-03-02,5400
  3 | 3,2011-03-14,8100
  4 | 4,2012-10-05,2200
  5 | 5,2011-09-03,14100
  6 | 6,2011-03-05,6500
  7 | 7,2012-06-06,37000
  8 | 8,2013-02-18,5500
  9 | 9,2010-07-05,7500
 10 | 10,2010-01-23,6700
 11 | 11,2011-09-19,5200
 12 | 12,2010-01-19,29700
 13 | 13,2013-09-28,6000
 14 | 14,2013-10-23,3300
 15 | 15,2010-10-09,2700
 16 | 16,2010-07-14,5100
 17 | 17,2010-05-13,29000
 18 | 18,2010-01-16,21800
 19 | 19,2013-05-23,5700
 20 | 20,2011-04-24,5900
 21 | 21,2011-09-07,4500
 22 | 22,2011-02-20,8100
 23 | 23,2012-10-15,6300
 24 | 24,2010-04-16,15500
 25 | 25,2011-08-22,6300
 26 | 26,2011-08-10,8800
 27 | 27,2010-09-01,7700
 28 | 28,2013-10-16,4300
 29 | 29,2010-03-04,8100
 30 | 30,2010-05-01,9200
 31 | 31,2011-04-16,7700
 32 | 32,2013-09-04,2300
 33 | 33,2010-05-26,17400
 34 | 34,2011-04-14,4000
 35 | 35,2010-09-29,5700
 36 | 36,2010-04-11,2800
 37 | 37,2010-07-26,3600
 38 | 38,2011-05-04,17200
 39 | 39,2013-04-03,6000
 40 | 40,2011-10-21,8400
 41 | 41,2010-01-11,5600
 42 | 42,2012-03-17,6400
 43 | 43,2010-07-10,8800
 44 | 44,2010-09-22,22100
 45 | 45,2012-08-31,4000
 46 | 46,2011-06-11,8800
 47 | 47,2010-03-08,5400
 48 | 48,2010-04-29,8300
 49 | 49,2011-02-05,14500
 50 | 50,2011-10-24,7500
 51 | 51,2011-04-17,7400
 52 | 52,2011-03-19,4000
 53 | 53,2010-07-02,5300
 54 | 54,2010-07-21,15700
 55 | 55,2013-08-09,2800
 56 | 56,2013-01-14,48900
 57 | 57,2011-06-14,4100
 58 | 58,2010-07-30,12300
 59 | 59,2010-05-13,9100
 60 | 60,2013-06-19,7600
 61 | 61,2010-03-13,9700
 62 | 62,2013-10-15,5000
 63 | 63,2012-10-02,4900
 64 | 64,2010-06-08,6300
 65 | 65,2010-08-02,3300
 66 | 66,2010-05-03,8600
 67 | 67,2013-08-23,11300
 68 | 68,2010-10-03,7300
 69 | 69,2010-05-23,5200
 70 | 70,2010-03-28,26400
 71 | 71,2010-02-05,9300
 72 | 72,2010-06-18,6900
 73 | 73,2013-07-08,7500
 74 | 74,2010-04-04,6600
 75 | 75,2011-05-27,8700
 76 | 76,2011-03-17,8800
 77 | 77,2013-03-03,6500
 78 | 78,2012-01-29,6800
 79 | 79,2010-07-19,4900
 80 | 80,2010-01-13,5600
 81 | 81,2013-01-22,7800
 82 | 82,2010-07-05,7500
 83 | 83,2010-04-17,3200
 84 | 84,2010-10-13,16100
 85 | 85,2010-06-26,5400
 86 | 86,2011-07-04,7500
 87 | 87,2010-05-29,2100
 88 | 88,2012-02-04,6500
 89 | 89,2013-06-15,8400
 90 | 90,2010-01-04,3600
 91 | 91,2010-09-07,6900
 92 | 92,2012-05-19,5700
 93 | 93,2010-08-13,15300
 94 | 94,2011-05-11,15700
 95 | 95,2013-09-23,6100
 96 | 96,2011-05-27,14900
 97 | 97,2010-03-30,2700
 98 | 98,2010-01-15,2900
 99 | 99,2013-07-21,12900
100 | 100,2010-07-22,7500
101 | 101,2013-03-10,7100
102 | 102,2010-07-04,9500
103 | 103,2010-01-02,7000
104 | 104,2012-05-02,8700
105 | 105,2013-04-28,8000
106 | 106,2011-04-25,5200
107 | 107,2010-10-23,9200
108 | 108,2010-07-21,5900
109 | 109,2010-07-14,8900
110 | 110,2010-09-10,3400
111 | 111,2012-05-05,6400
112 | 112,2010-10-16,2000
113 | 113,2013-03-31,8200
114 | 114,2013-08-01,8300
115 | 115,2010-04-23,5100
116 | 116,2011-10-16,6100
117 | 117,2010-03-01,3100
118 | 118,2010-06-23,4100
119 | 119,2011-10-17,14400
120 | 120,2013-07-10,3200
121 | 121,2010-06-19,5300
122 | 122,2013-04-25,9100
123 | 123,2010-06-22,3900
124 | 124,2013-09-14,7900
125 | 125,2010-03-08,5100
126 | 126,2010-01-06,8500
127 | 127,2010-08-16,5800
128 | 128,2010-05-27,12800
129 | 129,2010-03-01,14900
130 | 130,2010-08-16,9500
131 | 131,2010-01-24,5400
132 | 132,2010-05-10,6000
133 | 133,2011-01-31,3200
134 | 134,2010-08-12,4300
135 | 135,2012-09-01,6900
136 | 136,2010-08-29,6600
137 | 137,2010-01-20,7400
138 | 138,2012-02-23,4800
139 | 139,2012-09-26,8700
140 | 140,2010-02-23,9100
141 | 141,2011-10-05,5200
142 | 142,2010-04-18,44500
143 | 143,2010-06-28,10800
144 | 144,2010-09-18,12600
145 | 145,2013-08-02,6800
146 | 146,2013-09-28,8500
147 | 147,2011-09-20,19900
148 | 148,2012-09-02,9200
149 | 149,2010-03-19,11200
150 | 150,2012-01-14,3700
151 | 151,2013-02-21,6400
152 | 152,2012-09-28,7500
153 | 153,2010-05-02,5400
154 | 154,2010-03-19,17700
155 | 155,2010-10-13,2700
156 | 156,2010-09-19,9400
157 | 157,2011-08-26,10500
158 | 158,2011-08-29,9800
159 | 159,2011-02-22,18200
160 | 160,2010-03-14,5100
161 | 161,2010-08-23,6900
162 | 162,2010-01-28,11700
163 | 163,2013-07-02,6600
164 | 164,2011-09-22,6700
165 | 165,2010-07-06,7800
166 | 166,2010-01-25,8900
167 | 167,2013-06-02,9400
168 | 168,2013-01-13,2400
169 | 169,2011-03-02,2700
170 | 170,2013-02-24,5300
171 | 171,2010-10-09,5100
172 | 172,2010-09-07,6100
173 | 173,2013-09-13,5200
174 | 174,2013-05-09,4500
175 | 175,2013-09-12,36700
176 | 176,2012-05-04,8800
177 | 177,2010-08-17,12600
178 | 178,2011-08-16,8300
179 | 179,2010-08-11,5300
180 | 180,2010-04-28,8000
181 | 181,2010-04-24,6300
182 | 182,2010-03-01,10400
183 | 183,2010-05-20,6500
184 | 184,2010-01-03,4600
185 | 185,2013-09-21,5300
186 | 186,2010-04-22,7800
187 | 187,2010-08-08,6100
188 | 188,2010-07-14,6000
189 | 189,2011-06-19,6000
190 | 190,2010-01-10,12300
191 | 191,2011-07-27,2400
192 | 192,2012-02-14,12200
193 | 193,2010-02-28,2800
194 | 194,2011-10-14,14400
195 | 195,2012-03-12,3500
196 | 196,2010-04-11,3800
197 | 197,2013-03-13,18000
198 | 198,2010-07-20,41600
199 | 199,2013-10-02,9800
200 | 200,2013-02-05,7100
201 | 


--------------------------------------------------------------------------------
/com.homework/lib/je-analysis-1.5.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/lib/je-analysis-1.5.1.jar


--------------------------------------------------------------------------------
/com.homework/lib/lucene-core-2.3.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/lib/lucene-core-2.3.0.jar


--------------------------------------------------------------------------------
/com.homework/lib/lucene-core-3.1.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/lib/lucene-core-3.1.0.jar


--------------------------------------------------------------------------------
/com.homework/lib/paoding-analysis.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/lib/paoding-analysis.jar


--------------------------------------------------------------------------------
/com.homework/lib/说明:
--------------------------------------------------------------------------------
1 | paoding-analysis.jar只支持lucene-core-3.1.0.jar
2 | je-analysis-1.5.1.jar不支持lucene3.0以上的，所以
3 | 用paoding只能先lucene3.1
4 | 用je只能选lucene2.3
5 | 


--------------------------------------------------------------------------------
/com.homework/pom.xml:
--------------------------------------------------------------------------------
 1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 2 |   xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 3 |   <modelVersion>4.0.0</modelVersion>
 4 | 
 5 |   <groupId>com</groupId>
 6 |   <artifactId>com.homework</artifactId>
 7 |   <version>0.0.1-SNAPSHOT</version>
 8 |   <packaging>jar</packaging>
 9 | 
10 |   <name>com.homework</name>
11 |   <url>http://maven.apache.org</url>
12 | 
13 |   <properties>
14 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
15 |     <mahout.version>0.8</mahout.version>
16 |   </properties>
17 | 
18 |   <dependencies>
19 |   <dependency>
20 |   		<groupId>org.apache.hadoop</groupId>
21 |   		<artifactId>hadoop-core</artifactId>
22 |   		<version>1.1.2</version>
23 |   	</dependency>
24 |   	<dependency>
25 | 			<groupId>org.apache.mahout</groupId>
26 | 			<artifactId>mahout-core</artifactId>
27 | 			<version>${mahout.version}</version>
28 | 	</dependency>
29 | 			<dependency>
30 | 			<groupId>org.apache.mahout</groupId>
31 | 			<artifactId>mahout-integration</artifactId>
32 | 			<version>${mahout.version}</version>
33 | 			<exclusions>
34 | 				<exclusion>
35 | 					<groupId>org.mortbay.jetty</groupId>
36 | 					<artifactId>jetty</artifactId>
37 | 				</exclusion>
38 | 				<exclusion>
39 | 					<groupId>org.apache.cassandra</groupId>
40 | 					<artifactId>cassandra-all</artifactId>
41 | 				</exclusion>
42 | 				<exclusion>
43 | 					<groupId>me.prettyprint</groupId>
44 | 					<artifactId>hector-core</artifactId>
45 | 				</exclusion>
46 | 			</exclusions>
47 | 		</dependency>
48 |   	<dependency>
49 |      <groupId>org.apache.hive</groupId>
50 |       <artifactId>hive-service</artifactId>
51 |       <version>0.11.0</version>
52 |     </dependency>
53 |     
54 |     
55 |     <dependency>
56 |       <groupId>junit</groupId>
57 |       <artifactId>junit</artifactId>
58 |       <version>3.8.1</version>
59 |       <scope>test</scope>
60 |     </dependency>
61 | <!--最终的决策树保存在了XML中，使用了Dom4J，注意如果要让Dom4J支持按XPath选择节点，还得引入包jaxen.jar-->
62 | <!--决策树引入的第三方包，主要操作XML文件用到 -->  
63 |     <dependency>
64 |     	<groupId>dom4j</groupId>
65 |     	<artifactId>dom4j</artifactId>
66 |     	<version>1.6.1</version>
67 |     </dependency>
68 |     <dependency>
69 | 	    <groupId>jaxen</groupId>
70 | 	    <artifactId>jaxen</artifactId>
71 | 	    <version>1.1.6</version>
72 |    </dependency>
73 | <!--决策树引入的第三方包，主要操作XML文件用到 -->
74 |  </dependencies>
75 | </project>
76 | 


--------------------------------------------------------------------------------
/com.homework/scripts/clustering/canopy/canopy-mahout.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 数据准备：
 3 | canopy.dat文件
 4 | 8.1 8.1  
 5 | 
 6 | 7.1 7.1  
 7 | 
 8 | 6.2 6.2  
 9 | 
10 | 7.1 7.1  
11 | 
12 | 2.1 2.1  
13 | 
14 | 1.1 1.1  
15 | 
16 | 0.1 0.1  
17 | 
18 | 3.0 3.0
19 | 
20 | # 1.转换成向量，mahout用InputDriver数据转换时候，需要数据默认用空格分隔
21 | mahout org.apache.mahout.clustering.conversion.InputDriver -i /user/hdfs/canopy/in/canopy.dat -o /user/hdfs/canopy/vecfile -v org.apache.mahout.math.RandomAccessSparseVector
22 | # 2. 调用命令
23 | mahout canopy -i /user/hdfs/canopy/vecfile -o /user/hdfs/canopy/out/result -t1 8 -t2 4 -ow -cl 
24 | 
25 | 
26 | # 3.查看结果
27 | 
28 | mahout seqdumper -i /user/hdfs/canopy/out/result/clusters-0-final/part-r-00000  -o /home/hadoop/output/result
29 | #关联各个点
30 | mahout clusterdump -i /user/hdfs/canopy/out/result/clusters-0-final/part-r-00000  -o /home/hadoop/output/result -p /user/hdfs/canopy/out/result/clusteredPoints
31 | 
32 | 
33 | C-0{n=2 c=[6.888, 6.888] r=[0.237, 0.237]}
34 | 	Weight : [props - optional]:  Point:
35 | 	1.0: [8.100, 8.100]
36 | 	1.0: [7.100, 7.100]
37 | 	1.0: [6.200, 6.200]
38 | 	1.0: [7.100, 7.100]
39 | C-1{n=2 c=[1.083, 1.083] r=[0.983, 0.983]}
40 | 	Weight : [props - optional]:  Point:
41 | 	1.0: [2.100, 2.100]
42 | 	1.0: [1.100, 1.100]
43 | 	1.0: [3.000, 3.000]
44 | C-2{n=1 c=[0.100, 0.100] r=[]}
45 | 	Weight : [props - optional]:  Point:
46 | 	1.0: [0.100, 0.100]


--------------------------------------------------------------------------------
/com.homework/scripts/clustering/canopy/canopy.dat:
--------------------------------------------------------------------------------
1 | 8.1 8.1  
2 | 7.1 7.1  
3 | 6.2 6.2  
4 | 7.1 7.1  
5 | 2.1 2.1  
6 | 1.1 1.1  
7 | 0.1 0.1  
8 | 3.0 3.0


--------------------------------------------------------------------------------
/com.homework/scripts/fp-growth/fpg-mahout.txt:
--------------------------------------------------------------------------------
 1 | mahout fpg -i /user/hdfs/fp-growth/in/fpg.txt -o /user/hdfs/fp-growth/out -k 50 -method mapreduce -regex '[\ ]' -s 2
 2 | 13周作业
 3 | mahout fpg -i /user/hdfs/week13/user2items.csv -o /user/hdfs/week13/out -k 50 -method mapreduce -regex '[\ ]' -s 4
 4 | 查看结果
 5 | 
 6 | mahout seqdumper -i /user/hdfs/fp-growth/out/frequentpatterns/part-r-00000
 7 | 结果：
 8 | Key: I1: Value: ([I1],6), ([I2, I1],4), ([I1, I3],4), ([I2, I1, I5],2), ([I2, I1, I3],2)
 9 | Key: I2: Value: ([I2],7), ([I2, I3],4), ([I2, I1],4), ([I2, I1, I5],2), ([I2, I1, I3],2), ([I2, I4],2)
10 | Key: I3: Value: ([I3],6), ([I2, I3],4), ([I1, I3],4), ([I2, I1, I3],2)
11 | Key: I4: Value: ([I2, I4],2)
12 | Key: I5: Value: ([I2, I1, I5],2)
13 | Count: 5
14 | 查看fpgrowth
15 | mahout seqdumper -i /user/hdfs/fp-growth/out/fpgrowth/part-r-00000
16 | Key: I2: Value: ([I2],7)
17 | Key: I1: Value: ([I1],6), ([I2, I1],4)
18 | Key: I3: Value: ([I3],6), ([I2, I3],4), ([I1, I3],4), ([I2, I1, I3],2)
19 | Key: I4: Value: ([I2, I4],2)
20 | Key: I5: Value: ([I2, I1, I5],2)
21 | Count: 5
22 | 查看fList
23 | mahout seqdumper -i /user/hdfs/fp-growth/out/fList
24 | Key: I2: Value: 7
25 | Key: I1: Value: 6
26 | Key: I3: Value: 6
27 | Key: I4: Value: 2
28 | Key: I5: Value: 2
29 | Count: 5


--------------------------------------------------------------------------------
/com.homework/scripts/fp-growth/fpg.txt:
--------------------------------------------------------------------------------
1 | I1 I2 I5
2 | I2 I4
3 | I2 I3
4 | I1 I2 I4
5 | I1 I3
6 | I2 I3
7 | I1 I3
8 | I1 I2 I3 I5
9 | I1 I2 I3


--------------------------------------------------------------------------------
/com.homework/scripts/hive/HiveJDBC.java:
--------------------------------------------------------------------------------
 1 | package com.hive.jdbc;
 2 | import java.sql.Connection;
 3 | import java.sql.DriverManager;
 4 | import java.sql.ResultSet;
 5 | import java.sql.Statement;
 6 | 
 7 | public class HiveJDBC {
 8 | 
 9 | 	public static void main(String[] args) {
10 |         try {
11 |             Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");
12 |             // 查询语句
13 |             String querySQL = "SELECT * FROM t_rp";
14 |             // 链接hive
15 |             Connection con = DriverManager.getConnection("jdbc:hive://192.168.0.100:10000/default", "hive", "hive");
16 |             Statement stmt = con.createStatement();
17 |             // 执行查询语句
18 |             ResultSet res = stmt.executeQuery(querySQL);
19 |             while (res.next()) {
20 |                 System.out.println("Result: key:" + res.getString(1) + "  –>  value:" + res.getString(2));
21 |             }
22 |             stmt.close();
23 |             con.close();
24 |         } catch (Exception e) {
25 |             e.printStackTrace();
26 |         }
27 |     }
28 | 
29 | 
30 | }
31 | /*
32 | import java.sql.SQLException;
33 | import java.sql.Connection;
34 | import java.sql.ResultSet;
35 | import java.sql.Statement;
36 | import java.sql.DriverManager;
37 |  
38 | public class HiveJdbcClient {
39 |   private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";
40 |  
41 |   *//**
42 |  * @param args
43 |  * @throws SQLException
44 |    *//*
45 |   public static void main(String[] args) throws SQLException {
46 |       try {
47 |       Class.forName(driverName);
48 |     } catch (ClassNotFoundException e) {
49 |       // TODO Auto-generated catch block
50 |       e.printStackTrace();
51 |       System.exit(1);
52 |     }
53 |     Connection con = DriverManager.getConnection("jdbc:hive://localhost:10000/default", "", "");
54 |     Statement stmt = con.createStatement();
55 |     String tableName = "testHiveDriverTable";
56 |     stmt.executeQuery("drop table " + tableName);
57 |     ResultSet res = stmt.executeQuery("create table " + tableName + " (key int, value string)");
58 |     // show tables
59 |     String sql = "show tables '" + tableName + "'";
60 |     System.out.println("Running: " + sql);
61 |     res = stmt.executeQuery(sql);
62 |     if (res.next()) {
63 |       System.out.println(res.getString(1));
64 |     }
65 |     // describe table
66 |     sql = "describe " + tableName;
67 |     System.out.println("Running: " + sql);
68 |     res = stmt.executeQuery(sql);
69 |     while (res.next()) {
70 |       System.out.println(res.getString(1) + "\t" + res.getString(2));
71 |     }
72 |  
73 |     // load data into table
74 |     // NOTE: filepath has to be local to the hive server
75 |     // NOTE: /tmp/a.txt is a ctrl-A separated file with two fields per line
76 |     String filepath = "/tmp/a.txt";
77 |     sql = "load data local inpath '" + filepath + "' into table " + tableName;
78 |     System.out.println("Running: " + sql);
79 |     res = stmt.executeQuery(sql);
80 |  
81 |     // select * query
82 |     sql = "select * from " + tableName;
83 |     System.out.println("Running: " + sql);
84 |     res = stmt.executeQuery(sql);
85 |     while (res.next()) {
86 |       System.out.println(String.valueOf(res.getInt(1)) + "\t" + res.getString(2));
87 |     }
88 |  
89 |     // regular hive query
90 |     sql = "select count(1) from " + tableName;
91 |     System.out.println("Running: " + sql);
92 |     res = stmt.executeQuery(sql);
93 |     while (res.next()) {
94 |       System.out.println(res.getString(1));
95 |     }
96 |   }
97 | }*/


--------------------------------------------------------------------------------
/com.homework/scripts/week10/1.pig:
--------------------------------------------------------------------------------
 1 | #计算1的好友推荐
 2 | -- Dataguru Hadoop Course
 3 | -- Code by James 
 4 | 
 5 | -- Load Data
 6 | data1 = LOAD '/user/hdfs/week10/karate.csv' AS ( source, target );
 7 | 
 8 | data2 = LOAD '/user/hdfs/week10/karate.csv' AS ( source, target );
 9 | 
10 | -- Mine the common friends
11 | common_jnd = JOIN data1 BY target, data2 BY target;
12 | 
13 | common_prj = FOREACH common_jnd GENERATE data1::target AS common_friend, data1::source AS user, data2::source AS candidate;
14 | 
15 | common_flt = FILTER common_prj BY user != candidate;
16 | common_grp = GROUP common_flt BY (user,candidate);-- 此句测试用
17 | common = FOREACH ( GROUP common_flt BY (user,candidate) ) GENERATE FLATTEN(group) AS (user,candidate), COUNT(common_flt) AS cnt;
18 | 
19 | -- Recommendation
20 | user = FOREACH ( GROUP common BY user ) 
21 | {
22 |     candidate_srt = ORDER common BY cnt DESC;
23 |     candidate_lim = LIMIT candidate_srt 5;
24 |     GENERATE FLATTEN(candidate_lim) AS ( user, candidate, cnt );
25 | }
26 | 
27 | STORE user INTO '/user/hdfs/week10/result_1';


--------------------------------------------------------------------------------
/com.homework/scripts/week10/common_friend.pig:
--------------------------------------------------------------------------------
 1 | -- Dataguru Hadoop Course
 2 | -- Code by James 
 3 | 
 4 | -- Load Data
 5 | data1 = LOAD '/user/huangjun/dataguru/wiki-Vote' AS ( source, target );
 6 | 
 7 | data2 = LOAD '/user/huangjun/dataguru/wiki-Vote' AS ( source, target );
 8 | 
 9 | -- Mine the common friends
10 | common_jnd = JOIN data1 BY target, data2 BY target;
11 | 
12 | common_prj = FOREACH common_jnd GENERATE data1::target AS common_friend, data1::source AS user, data2::source AS candidate;
13 | 
14 | common_flt = FILTER common_prj BY user != candidate;
15 | 
16 | common = FOREACH ( GROUP common_flt BY (user,candidate) ) GENERATE FLATTEN(group) AS (user,candidate), COUNT(common_flt) AS cnt;
17 | 
18 | -- Recommendation
19 | user = FOREACH ( GROUP common BY user ) 
20 | {
21 |     candidate_srt = ORDER common BY cnt DESC;
22 |     candidate_lim = LIMIT candidate_srt 5;
23 |     GENERATE FLATTEN(candidate_lim) AS ( user, candidate, cnt );
24 | }
25 | 
26 | STORE user INTO '/user/huangjun/dataguru/result';


--------------------------------------------------------------------------------
/com.homework/scripts/week10/karate.csv:
--------------------------------------------------------------------------------
 1 | Source	Target
 2 | 2	1
 3 | 3	1
 4 | 3	2
 5 | 4	1
 6 | 4	2
 7 | 4	3
 8 | 5	1
 9 | 6	1
10 | 7	1
11 | 7	5
12 | 7	6
13 | 8	1
14 | 8	2
15 | 8	3
16 | 8	4
17 | 9	1
18 | 9	3
19 | 10	3
20 | 11	1
21 | 11	5
22 | 11	6
23 | 12	1
24 | 13	1
25 | 13	4
26 | 14	1
27 | 14	2
28 | 14	3
29 | 14	4
30 | 17	6
31 | 17	7
32 | 18	1
33 | 18	2
34 | 20	1
35 | 20	2
36 | 22	1
37 | 22	2
38 | 26	24
39 | 26	25
40 | 28	3
41 | 28	24
42 | 28	25
43 | 29	3
44 | 30	24
45 | 30	27
46 | 31	2
47 | 31	9
48 | 32	1
49 | 32	25
50 | 32	26
51 | 32	29
52 | 33	3
53 | 33	9
54 | 33	15
55 | 33	16
56 | 33	19
57 | 33	21
58 | 33	23
59 | 33	24
60 | 33	30
61 | 33	31
62 | 33	32
63 | 34	9
64 | 34	10
65 | 34	14
66 | 34	15
67 | 34	16
68 | 34	19
69 | 34	20
70 | 34	21
71 | 34	23
72 | 34	24
73 | 34	27
74 | 34	28
75 | 34	29
76 | 34	30
77 | 34	31
78 | 34	32
79 | 34	33


--------------------------------------------------------------------------------
/com.homework/scripts/week10/w10.pig:
--------------------------------------------------------------------------------
 1 | -- Dataguru Hadoop Course
 2 | -- Code by James 
 3 | 
 4 | -- Load Data
 5 | data1 = LOAD '/user/hdfs/week10/noway' AS ( source, target );
 6 | 
 7 | data2 = LOAD '/user/hdfs/week10/noway' AS ( source, target );
 8 | 
 9 | -- Mine the common friends
10 | common_jnd = JOIN data1 BY target, data2 BY target;
11 | 
12 | common_prj = FOREACH common_jnd GENERATE data1::target AS common_friend, data1::source AS user, data2::source AS candidate;
13 | 
14 | common_flt = FILTER common_prj BY user != candidate;
15 | -- common_grp = GROUP common_flt BY (user,candidate);-- 此句测试用
16 | common = FOREACH ( GROUP common_flt BY (user,candidate) ) GENERATE FLATTEN(group) AS (user,candidate), COUNT(common_flt) AS cnt;
17 | 
18 | -- Recommendation
19 | user = FOREACH ( GROUP common BY user ) 
20 | {
21 |     candidate_srt = ORDER common BY cnt DESC;
22 |     candidate_lim = LIMIT candidate_srt 5;
23 |     GENERATE FLATTEN(candidate_lim) AS ( user, candidate, cnt );
24 | }
25 | 
26 | STORE user INTO '/user/hdfs/week10/noway_out/';


--------------------------------------------------------------------------------
/com.homework/scripts/week10/杂文件/karate2.csv:
--------------------------------------------------------------------------------
 1 | Source	Target
 2 | 1	2
 3 | 2	1
 4 | 3	1
 5 | 3	2
 6 | 4	1
 7 | 4	2
 8 | 4	3
 9 | 5	1
10 | 6	1
11 | 7	1
12 | 7	5
13 | 7	6
14 | 8	1
15 | 8	2
16 | 8	3
17 | 8	4
18 | 9	1
19 | 9	3
20 | 10	3
21 | 11	1
22 | 11	5
23 | 11	6
24 | 12	1
25 | 13	1
26 | 13	4
27 | 14	1
28 | 14	2
29 | 14	3
30 | 14	4
31 | 17	6
32 | 17	7
33 | 18	1
34 | 18	2
35 | 20	1
36 | 20	2
37 | 22	1
38 | 22	2
39 | 26	24
40 | 26	25
41 | 28	3
42 | 28	24
43 | 28	25
44 | 29	3
45 | 30	24
46 | 30	27
47 | 31	2
48 | 31	9
49 | 32	1
50 | 32	25
51 | 32	26
52 | 32	29
53 | 33	3
54 | 33	9
55 | 33	15
56 | 33	16
57 | 33	19
58 | 33	21
59 | 33	23
60 | 33	24
61 | 33	30
62 | 33	31
63 | 33	32
64 | 34	9
65 | 34	10
66 | 34	14
67 | 34	15
68 | 34	16
69 | 34	19
70 | 34	20
71 | 34	21
72 | 34	23
73 | 34	24
74 | 34	27
75 | 34	28
76 | 34	29
77 | 34	30
78 | 34	31
79 | 34	32
80 | 34	33


--------------------------------------------------------------------------------
/com.homework/scripts/week10/杂文件/karate2.csv.bak:
--------------------------------------------------------------------------------
 1 | Source	Target
 2 | 2	1
 3 | 3	1
 4 | 3	2
 5 | 4	1
 6 | 4	2
 7 | 4	3
 8 | 5	1
 9 | 6	1
10 | 7	1
11 | 7	5
12 | 7	6
13 | 8	1
14 | 8	2
15 | 8	3
16 | 8	4
17 | 9	1
18 | 9	3
19 | 10	3
20 | 11	1
21 | 11	5
22 | 11	6
23 | 12	1
24 | 13	1
25 | 13	4
26 | 14	1
27 | 14	2
28 | 14	3
29 | 14	4
30 | 17	6
31 | 17	7
32 | 18	1
33 | 18	2
34 | 20	1
35 | 20	2
36 | 22	1
37 | 22	2
38 | 26	24
39 | 26	25
40 | 28	3
41 | 28	24
42 | 28	25
43 | 29	3
44 | 30	24
45 | 30	27
46 | 31	2
47 | 31	9
48 | 32	1
49 | 32	25
50 | 32	26
51 | 32	29
52 | 33	3
53 | 33	9
54 | 33	15
55 | 33	16
56 | 33	19
57 | 33	21
58 | 33	23
59 | 33	24
60 | 33	30
61 | 33	31
62 | 33	32
63 | 34	9
64 | 34	10
65 | 34	14
66 | 34	15
67 | 34	16
68 | 34	19
69 | 34	20
70 | 34	21
71 | 34	23
72 | 34	24
73 | 34	27
74 | 34	28
75 | 34	29
76 | 34	30
77 | 34	31
78 | 34	32
79 | 34	33


--------------------------------------------------------------------------------
/com.homework/scripts/week10/杂文件/mytest.txt:
--------------------------------------------------------------------------------
 1 | Source	Target
 2 | 1	3
 3 | 1	4
 4 | 2	3
 5 | 2	4
 6 | 2	1
 7 | 3	1
 8 | 3	2
 9 | 4	1
10 | 4	2
11 | 4	3


--------------------------------------------------------------------------------
/com.homework/scripts/week10/杂文件/noway:
--------------------------------------------------------------------------------
 1 | Source	Target
 2 | 1	3
 3 | 1	4
 4 | 2	3
 5 | 2	4
 6 | 3	1
 7 | 3	2
 8 | 4	1
 9 | 4	2
10 | 


--------------------------------------------------------------------------------
/com.homework/scripts/week10/杂文件/tes2.txt:
--------------------------------------------------------------------------------
1 | Source	Target
2 | 2	3
3 | 2	4
4 | 2	1
5 | 3	1
6 | 3	2
7 | 4	1
8 | 4	2
9 | 4	3


--------------------------------------------------------------------------------
/com.homework/scripts/week10/计算33的好友推荐(不关注别人的没有推荐)/common.java.bak:
--------------------------------------------------------------------------------
1 | class  
2 | {
3 | 	public static void main(String[] args) 
4 | 	{
5 | 		System.out.println("Hello World!");
6 | 	}
7 | }
8 | 


--------------------------------------------------------------------------------
/com.homework/scripts/week10/计算33的好友推荐(不关注别人的没有推荐)/pig.pig:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/scripts/week10/计算33的好友推荐(不关注别人的没有推荐)/pig.pig


--------------------------------------------------------------------------------
/com.homework/scripts/week10/计算33的好友推荐(不关注别人的没有推荐)/user.java:
--------------------------------------------------------------------------------
  1 | (10,33,1)
  2 | (10,29,1)
  3 | (10,28,1)
  4 | (10,14,1)
  5 | (10,9,1)
  6 | (11,7,3)
  7 | (11,12,1)
  8 | (11,8,1)
  9 | (11,6,1)
 10 | (11,5,1)
 11 | (12,4,1)
 12 | (12,2,1)
 13 | (12,3,1)
 14 | (12,32,1)
 15 | (12,22,1)
 16 | (13,8,2)
 17 | (13,14,2)
 18 | (13,22,1)
 19 | (13,18,1)
 20 | (13,12,1)
 21 | (14,8,4)
 22 | (14,4,3)
 23 | (14,22,2)
 24 | (14,20,2)
 25 | (14,18,2)
 26 | (17,11,1)
 27 | (17,7,1)
 28 | (18,8,2)
 29 | (18,4,2)
 30 | (18,3,2)
 31 | (18,22,2)
 32 | (18,20,2)
 33 | (2,3,1)
 34 | (2,4,1)
 35 | (2,5,1)
 36 | (2,6,1)
 37 | (2,7,1)
 38 | (20,3,2)
 39 | (20,22,2)
 40 | (20,18,2)
 41 | (20,14,2)
 42 | (20,8,2)
 43 | (22,8,2)
 44 | (22,4,2)
 45 | (22,3,2)
 46 | (22,20,2)
 47 | (22,14,2)
 48 | (26,28,2)
 49 | (26,34,1)
 50 | (26,30,1)
 51 | (26,32,1)
 52 | (26,33,1)
 53 | (28,33,2)
 54 | (28,26,2)
 55 | (28,29,1)
 56 | (28,14,1)
 57 | (28,10,1)
 58 | (29,8,1)
 59 | (29,4,1)
 60 | (29,10,1)
 61 | (29,33,1)
 62 | (29,14,1)
 63 | (3,20,2)
 64 | (3,18,2)
 65 | (3,14,2)
 66 | (3,4,2)
 67 | (3,8,2)
 68 | (30,34,2)
 69 | (30,33,1)
 70 | (30,28,1)
 71 | (30,26,1)
 72 | (31,18,1)
 73 | (31,22,1)
 74 | (31,33,1)
 75 | (31,34,1)
 76 | (31,20,1)
 77 | (32,11,1)
 78 | (32,13,1)
 79 | (32,14,1)
 80 | (32,18,1)
 81 | (32,20,1)
 82 | (33,34,10)
 83 | (33,28,2)
 84 | (33,29,1)
 85 | (33,4,1)
 86 | (33,8,1)
 87 | (34,33,10)
 88 | (34,30,2)
 89 | (34,28,1)
 90 | (34,26,1)
 91 | (34,32,1)
 92 | (4,8,3)
 93 | (4,14,3)
 94 | (4,18,2)
 95 | (4,20,2)
 96 | (4,22,2)
 97 | (5,7,1)
 98 | (5,9,1)
 99 | (5,11,1)
100 | (5,3,1)
101 | (5,13,1)
102 | (6,8,1)
103 | (6,9,1)
104 | (6,11,1)
105 | (6,12,1)
106 | (6,13,1)
107 | (7,11,3)
108 | (7,8,1)
109 | (7,2,1)
110 | (7,3,1)
111 | (7,4,1)
112 | (8,14,4)
113 | (8,4,3)
114 | (8,18,2)
115 | (8,20,2)
116 | (8,22,2)
117 | (9,8,2)
118 | (9,4,2)
119 | (9,14,2)
120 | (9,20,1)
121 | (9,3,1)


--------------------------------------------------------------------------------
/com.homework/scripts/week13/week13:
--------------------------------------------------------------------------------
 1 | 
 2 | 13周作业
 3 | mahout fpg -i /user/hdfs/week13/in/user2items2.csv -o /user/hdfs/week13/out -k 50 -method mapreduce -regex '[\ ]' -s 4
 4 | 查看结果
 5 | 
 6 | mahout seqdumper -i /user/hdfs/week13/out/frequentpatterns/part-r-00000
 7 | 结果：
 8 | Key: I1: Value: ([I1],6), ([I2, I1],4), ([I1, I3],4), ([I2, I1, I5],2), ([I2, I1, I3],2)
 9 | Key: I2: Value: ([I2],7), ([I2, I3],4), ([I2, I1],4), ([I2, I1, I5],2), ([I2, I1, I3],2), ([I2, I4],2)
10 | Key: I3: Value: ([I3],6), ([I2, I3],4), ([I1, I3],4), ([I2, I1, I3],2)
11 | Key: I4: Value: ([I2, I4],2)
12 | Key: I5: Value: ([I2, I1, I5],2)
13 | Count: 5
14 | 查看fpgrowth
15 | mahout seqdumper -i /user/hdfs/week13/out/fpgrowth/part-r-00000
16 | Key: I2: Value: ([I2],7)
17 | Key: I1: Value: ([I1],6), ([I2, I1],4)
18 | Key: I3: Value: ([I3],6), ([I2, I3],4), ([I1, I3],4), ([I2, I1, I3],2)
19 | Key: I4: Value: ([I2, I4],2)
20 | Key: I5: Value: ([I2, I1, I5],2)
21 | Count: 5
22 | 查看fList
23 | mahout seqdumper -i /user/hdfs/fp-growth/out/fList
24 | Key: I2: Value: 7
25 | Key: I1: Value: 6
26 | Key: I3: Value: 6
27 | Key: I4: Value: 2
28 | Key: I5: Value: 2
29 | Count: 5


--------------------------------------------------------------------------------
/com.homework/scripts/week8.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/scripts/week8.rar


--------------------------------------------------------------------------------
/com.homework/scripts/week8/homework.txt:
--------------------------------------------------------------------------------
 1 | --1.样本分词
 2 | hadoop jar /home/hadoop/in/tokenize.jar /user/hdfs/week8/homework/sport /user/hdfs/week8/homework/sport-out
 3 | 
 4 | --2.划分样本，80%训练集，20%测试集
 5 | processed= load '/user/hdfs/week8/homework/sport-out/part-r-00000' as (category:chararray,doc:chararray);
 6 | test = sample processed 0.2;
 7 | tfull= JOIN processed BY (category,doc) LEFT OUTER,test BY (category,doc);
 8 | t8=  filter tfull BY test::category is null;
 9 | train= foreach t8 generate processed::category as category,processed::doc as doc;
10 | store test into '/user/hdfs/week8/homework/test';
11 | store train into '/user/hdfs/week8/homework/train';
12 |   --查看划分结果
13 | test_count = foreach ( group test by category) generate group,COUNT(test.category);
14 | DUMP test_count;
15 | train_count = foreach ( group train by category) generate group,COUNT(train.category);
16 | DUMP train_count;
17 | 
18 | --3.训练学习集，及测试
19 | --mahout-0.6版,0.8不行
20 | --a.bayes
21 | mahout trainclassifier -i /user/hdfs/week8/homework/train -o /user/hdfs/week8/homework/model-bayes -type bayes -ng 1 -source hdfs
22 | mahout testclassifier -d /user/hdfs/week8/homework/test -m /user/hdfs/week8/homework/model-bayes -type bayes -ng 1 -source hdfs -method mapreduce
23 | --b.cbayes
24 | mahout trainclassifier -i /user/hdfs/week8/homework/train -o /user/hdfs/week8/homework/model-cbayes -type cbayes -ng 1 -source hdfs
25 | mahout testclassifier -d /user/hdfs/week8/homework/test -m /user/hdfs/week8/homework/model-cbayes -type cbayes -ng 1 -source hdfs -method mapreduce
26 | 
27 | --实战
28 | --分词
29 | hadoop jar /home/hadoop/in/tokenize.jar /user/hdfs/week8/homework/user-sport /user/hdfs/week8/homework/user-sport-out
30 | --运行：win7,eclipse下运行
31 | cbayes:
32 | hdfs://192.168.0.100:9000/user/hdfs/week8/homework/user-sport-out  hdfs://192.168.0.100:9000/user/hdfs/week8/homework/result_cbayes hdfs://192.168.0.100:9000/user/hdfs/week8/homework/model-cbayes cbayes
33 | bayes:
34 | hdfs://192.168.0.100:9000/user/hdfs/week8/homework/user-sport-out  hdfs://192.168.0.100:9000/user/hdfs/week8/homework/result_bayes hdfs://192.168.0.100:9000/user/hdfs/week8/homework/model-bayes bayes
35 | --求最大值——求用户浏览最多的类别，判断用户偏好
36 | bayes:
37 | user_count= load '/user/hdfs/week8/homework/result_bayes/part-r-00000' using PigStorage('|') AS (userid:chararray,category:chararray,times:int);
38 | result = foreach (group user_count by userid) {
39 |        sorted = order user_count by times desc;
40 |        top1= limit sorted 1;
41 |        generate flatten(top1),SUM(user_count.times);
42 | };
43 | DUMP result;
44 | store result into '/user/hdfs/week8/homework/final_result_bayes';
45 | cbayes:
46 | user_count= load '/user/hdfs/week8/homework/result_cbayes/part-r-00000' using PigStorage('|') AS (userid:chararray,category:chararray,times:int);
47 | result = foreach (group user_count by userid) {
48 |        sorted = order user_count by times desc;
49 |        top1= limit sorted 1;
50 |        generate flatten(top1),SUM(user_count.times);
51 | };
52 | DUMP result;
53 | store result into '/user/hdfs/week8/homework/final_result_cbayes';
54 | 


--------------------------------------------------------------------------------
/com.homework/scripts/week8/week8.pig:
--------------------------------------------------------------------------------
 1 | processed= load '/user/hdfs/week8/teacher/in/processed' as (category:chararray,doc:chararray);
 2 | test = sample processed 0.2;
 3 | 
 4 | --测试用
 5 | processed= load '/user/mypig/lefta.txt' as (a1:chararray,a2:chararray,a3:chararray);
 6 | test = sample processed 0.2;
 7 | 
 8 | tfull= JOIN processed BY (a1,a2,a3) LEFT OUTER,test BY (a1,a2,a3);
 9 | t8=  filter tfull BY test::a1 is null;
10 | train= foreach t8 generate processed::a1 as a1,processed::a2 as a2,processed::a3 as a3;
11 | store test into '/user/mypig/test';
12 | store train into '/user/mypig/train';
13 | --正式
14 | processed= load '/user/hdfs/week8/teacher/in/processed' as (category:chararray,doc:chararray);
15 | test = sample processed 0.2;
16 | tfull= JOIN processed BY (category,doc) LEFT OUTER,test BY (category,doc);
17 | t8=  filter tfull BY test::category is null;
18 | train= foreach t8 generate processed::category as category,processed::doc as doc;
19 | store test into '/user/hdfs/week8/teacher/test';
20 | store train into '/user/hdfs/week8/teacher/train';
21 | --统计
22 | test_count = foreach ( group test by category) generate group,COUNT(test.category);
23 | DUMP test_count;
24 | train_count = foreach ( group train by category) generate group,COUNT(train.category);
25 | DUMP train_count;
26 | --mahout-0.6,0.8不行
27 | mahout trainclassifier -i /user/hdfs/week8/teacher/train -o /user/hdfs/week8/model-bayes -type bayes -ng 1 -source hdfs
28 | 
29 | mahout testclassifier -d /user/hdfs/week8/teacher/test -m /user/hdfs/week8/model-bayes -type bayes -ng 1 -source hdfs -method mapreduce 
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/com.homework/scripts/week8/week8.txt:
--------------------------------------------------------------------------------
 1 | --1.分词
 2 | hadoop jar /home/hadoop/in/tokenize.jar /user/hdfs/week7/in /user/hdfs/week7/out
 3 | 
 4 | --2.划分样本，80%训练集，20%测试集
 5 | processed= load '/user/hdfs/week8/mine/in/processed' as (category:chararray,doc:chararray);
 6 | test = sample processed 0.2;
 7 | tfull= JOIN processed BY (category,doc) LEFT OUTER,test BY (category,doc);
 8 | t8=  filter tfull BY test::category is null;
 9 | train= foreach t8 generate processed::category as category,processed::doc as doc;
10 | store test into '/user/hdfs/week8/mine/test';
11 | store train into '/user/hdfs/week8/mine/train';
12 |   --查看划分结果
13 | test_count = foreach ( group test by category) generate group,COUNT(test.category);
14 | DUMP test_count;
15 | train_count = foreach ( group train by category) generate group,COUNT(train.category);
16 | DUMP train_count;
17 | 
18 | --3.训练学习集，及测试
19 | --mahout-0.6版,0.8不行
20 | --a.bayes
21 | mahout trainclassifier -i /user/hdfs/week8/mine/train -o /user/hdfs/week8/mine/model-bayes -type bayes -ng 1 -source hdfs
22 | mahout testclassifier -d /user/hdfs/week8/mine/test -m /user/hdfs/week8/mine/model-bayes -type bayes -ng 1 -source hdfs -method mapreduce
23 | --b.cbayes
24 | mahout trainclassifier -i /user/hdfs/week8/mine/train -o /user/hdfs/week8/mine/model-cbayes -type cbayes -ng 1 -source hdfs
25 | mahout testclassifier -d /user/hdfs/week8/mine/test -m /user/hdfs/week8/mine/model-cbayes -type cbayes -ng 1 -source hdfs -method mapreduce
26 | 
27 | --用户数据测试
28 | --分词
29 | hadoop jar /home/hadoop/in/tokenize.jar /user/hdfs/week8/mine/user /user/hdfs/week8/mine/user-out
30 | --运行：
31 | hdfs://192.168.0.100:9000/user/hdfs/week8/mine/user-out hdfs://192.168.0.100:9000/user/hdfs/week8/mine/user-output hdfs://192.168.0.100:9000/user/hdfs/week8/mine/model-cbayes cbayes
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/com.homework/scripts/week9/pagerank.r:
--------------------------------------------------------------------------------
 1 | #pages<-read.csv("page",header=FALSE);
 2 | pages<-read.csv("people.csv",header=FALSE);
 3 | #构造邻接矩阵(方阵):
 4 | mrow<-max(pages)
 5 | A<-matrix(0,nrow=mrow,ncol=mrow);
 6 |     #cols=length(pages[1,]);
 7 | rows=length(pages[,1]);
 8 |  for(i in 1:rows){
 9 |     p1<-pages[i,1];
10 |     p2<-pages[i,2];
11 |     A[p2,p1]<-1;
12 |   }
13 | 
14 | 
15 | #考虑阻尼系统的情况
16 | csum<-colSums(A);
17 | csum[csum==0] <- 1;
18 | Arow=nrow(A);
19 | d<-0.85;
20 | de<-1-d/Arow;
21 | delta <- (1-d)/Arow;
22 | B <- matrix(delta,nrow(A),ncol(A));
23 | for (i in 1:Arow) B[i,] <- B[i,] + d*A[i,]/csum;
24 | # 迭代求解特征向量值
25 | x <- rep(1,Arow);
26 | for (i in 1:100) x <- B %*% x
27 | x/sum(x)
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | #转换为概率矩阵(转移矩阵),不考虑阻尼系统
39 | csum<-colSums(A);
40 | csum[csum==0] <- 1;
41 | Arow=nrow(A);
42 | for(i in 1:Arow){
43 |   A[i,]<-A[i,]/csum;
44 | }
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | #利用幂法求解特征向量，不考虑阻尼系统的情况
60 | x <- rep(1,Arow);
61 | for (i in 1:10) x <- A %*% x
62 | #除以一个常数
63 | x/sum(x);
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/com.homework/src/common/com/homework/hdfs/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package com.homework.hdfs;


--------------------------------------------------------------------------------
/com.homework/src/hadoop/machinelearning/clustering/hadoop/machinelearning/clustering/canopy/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package hadoop.machinelearning.clustering.canopy;


--------------------------------------------------------------------------------
/com.homework/src/hadoop/machinelearning/clustering/hadoop/machinelearning/clustering/kmeans/KmeansHadoop.java:
--------------------------------------------------------------------------------
 1 | package hadoop.machinelearning.clustering.kmeans;
 2 | 
 3 | 
 4 | 
 5 | import java.util.Iterator;
 6 | 
 7 | import org.apache.hadoop.fs.Path;
 8 | import org.apache.hadoop.mapred.JobConf;
 9 | import org.apache.mahout.clustering.classify.WeightedVectorWritable;
10 | import org.apache.mahout.clustering.conversion.InputDriver;
11 | import org.apache.mahout.clustering.kmeans.KMeansDriver;
12 | import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
13 | import org.apache.mahout.common.distance.DistanceMeasure;
14 | import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
15 | import org.apache.mahout.math.Vector;
16 | import org.apache.mahout.utils.clustering.ClusterDumper;
17 | 
18 | import com.homework.hdfs.HdfsDAO;
19 | 
20 | /**
21 |  * 素材我已经改成《Mahout in Action》第七章的Hello world例子，运行正常，结果也书一样
22 |  * @author Administrator
23 |  *书中源码地址： https://github.com/tdunning/MiA
24 |  */
25 | public class KmeansHadoop {
26 |     private static final String HDFS = "hdfs://192.168.0.100:9000";
27 | 
28 |     public static void main(String[] args) throws Exception {
29 |         //String localFile = "datafile/randomData.csv";
30 |     	String localFile = "datafile/cluster/simple_k-means.txt";
31 |         String inPath = HDFS + "/user/hdfs/mix_data";
32 |         String seqFile = inPath + "/seqfile";
33 |         String seeds = inPath + "/seeds";
34 |         String outPath = inPath + "/result/";
35 |         String clusteredPoints = outPath + "/clusteredPoints";
36 | 
37 |         JobConf conf = config();
38 |         HdfsDAO hdfs = new HdfsDAO(HDFS, conf);
39 |         hdfs.rmr(inPath);
40 |         hdfs.mkdirs(inPath);
41 |         hdfs.copyFile(localFile, inPath);
42 |         hdfs.ls(inPath);
43 | 
44 |         InputDriver.runJob(new Path(inPath), new Path(seqFile), "org.apache.mahout.math.RandomAccessSparseVector");
45 | 
46 |         //int k = 3;
47 |         int k = 2;
48 |         Path seqFilePath = new Path(seqFile);
49 |         Path clustersSeeds = new Path(seeds);
50 |         DistanceMeasure measure = new EuclideanDistanceMeasure();
51 |         clustersSeeds = RandomSeedGenerator.buildRandom(conf, seqFilePath, clustersSeeds, k, measure);
52 |         KMeansDriver.run(conf, seqFilePath, clustersSeeds, new Path(outPath), measure, 0.01, 10, true, 0.01, false);
53 | 
54 |         Path outGlobPath = new Path(outPath, "clusters-*-final");
55 |         Path clusteredPointsPath = new Path(clusteredPoints);
56 |         System.out.printf("Dumping out clusters from clusters: %s and clusteredPoints: %s\n", outGlobPath, clusteredPointsPath);
57 | 
58 |         ClusterDumper clusterDumper = new ClusterDumper(outGlobPath, clusteredPointsPath);
59 |         clusterDumper.printClusters(null);
60 |     }
61 | 
62 |     public static JobConf config() {
63 |         JobConf conf = new JobConf(KmeansHadoop.class);
64 |         conf.setJobName("ItemCFHadoop");
65 |         conf.addResource("classpath:/hadoop/core-site.xml");
66 |         conf.addResource("classpath:/hadoop/hdfs-site.xml");
67 |         conf.addResource("classpath:/hadoop/mapred-site.xml");
68 |         return conf;
69 |     }
70 | 
71 |     public static void displayCluster(ClusterDumper clusterDumper) {
72 |         Iterator<Integer> keys = clusterDumper.getClusterIdToPoints().keySet().iterator();
73 |         while (keys.hasNext()) {
74 |             Integer center = keys.next();
75 |             System.out.println("Center:" + center);
76 |             for (WeightedVectorWritable point : clusterDumper.getClusterIdToPoints().get(center)) {
77 |                 Vector v = point.getVector();
78 |                 System.out.println(v.get(0) + "" + v.get(1));
79 |             }
80 |         }
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/com.homework/src/hadoop/machinelearning/clustering/hadoop/machinelearning/clustering/kmeans/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package hadoop.machinelearning.clustering.kmeans;


--------------------------------------------------------------------------------
/com.homework/src/main/java/com/homework/App.java:
--------------------------------------------------------------------------------
 1 | package com.homework;
 2 | 
 3 | /**
 4 |  * Hello world!
 5 |  *
 6 |  */
 7 | public class App 
 8 | {
 9 |     public static void main( String[] args )
10 |     {
11 |         System.out.println( "Hello World!" );
12 |     }
13 | }
14 | 


--------------------------------------------------------------------------------
/com.homework/src/mommon/com/homework/mommon/ComTest.java:
--------------------------------------------------------------------------------
 1 | package com.homework.mommon;
 2 | 
 3 | public class ComTest {
 4 | 
 5 | 	public static void main(String[] args) {
 6 | 		// TODO Auto-generated method stub
 7 |         String str="sss";
 8 |         String str2="dd";
 9 | 	}
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/com.homework/src/mommon/com/homework/mommon/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Jamas
6 |  *
7 |  */
8 | package com.homework.mommon;


--------------------------------------------------------------------------------
/com.homework/src/mommon/mytest/MenuTree.java:
--------------------------------------------------------------------------------
 1 | package mytest;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.Iterator;
 5 | import java.util.List;
 6 | //递归一颗树
 7 | public class MenuTree {
 8 | 
 9 | 
10 | 	public static void mytree(List<Node> nlist,Node node){
11 | 		System.out.print(node.getId()+node.getNodeName());
12 | 		Node subnode=null;
13 | 		Long id=node.getId();
14 | 		Iterator<Node> iter=nlist.iterator();
15 | 		boolean isexit=false;
16 | 		while(iter.hasNext()){
17 | 			Node nod=iter.next();
18 | 			if(nod.getParentId()==id){
19 | 				isexit=true;
20 | 				subnode=nod;
21 | 				mytree(nlist,subnode);
22 | 				
23 | 			}
24 | 		}
25 | 		if(!isexit)return;
26 | 		
27 | 	}
28 | 	
29 | 	
30 | 	public static void main(String[] args) {
31 |             
32 | 		long start = System.currentTimeMillis();
33 | 		List<Node> nodeList = new ArrayList<Node>();
34 | 		Node node1 = new Node(1l, "蔬菜", 0l);
35 | 		Node node2 = new Node(2l, "水产", 0l);
36 | 		Node node3 = new Node(3l, "畜牧", 0l);
37 | 		Node node4 = new Node(4l, "瓜类", 1l);
38 | 		Node node5 = new Node(5l, "叶类", 1l);
39 | 		Node node6 = new Node(6l, "丝瓜", 4l);
40 | 		Node node7 = new Node(7l, "黄瓜", 4l);
41 | 		Node node8 = new Node(8l, "白菜", 1l);
42 | 		Node node9 = new Node(9l, "虾", 2l);
43 | 		Node node10 = new Node(10l, "鱼", 2l);
44 | 		Node node11 = new Node(11l, "牛", 3l);
45 | 		Node node0=new Node(0l,"市场种类",-1l);
46 | 		
47 | 		nodeList.add(node0);
48 | 		nodeList.add(node1);
49 | 		nodeList.add(node2);
50 | 		nodeList.add(node3);
51 | 		nodeList.add(node4);
52 | 		nodeList.add(node5);
53 | 		nodeList.add(node6);
54 | 		nodeList.add(node7);
55 | 		nodeList.add(node8);
56 | 		nodeList.add(node9);
57 | 		nodeList.add(node10);
58 | 		nodeList.add(node11);
59 | 		
60 | 		mytree(nodeList,node0);
61 | 		//NodeUtil mt = new NodeUtil();
62 | 		//System.out.println(mt.getChildNodes(nodeList, 1l));
63 | 		long end = System.currentTimeMillis();
64 | 		System.out.println("用时:" + (end - start) + "ms");
65 | 	}
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/com.homework/src/mommon/mytest/Node.java:
--------------------------------------------------------------------------------
 1 | package mytest;
 2 | 
 3 | /**
 4 |  * 无限级节点模型
 5 |  */
 6 | public class Node {
 7 | 	/**
 8 | 	 * 节点id
 9 | 	 */
10 | 	private Long id;
11 | 
12 | 	/**
13 | 	 * 节点名称
14 | 	 */
15 | 	private String nodeName;
16 | 
17 | 	/**
18 | 	 * 父节点id
19 | 	 */
20 | 	private Long parentId;
21 | 
22 | 	public Node() {
23 | 	}
24 | 
25 | 	Node(Long id, Long parentId) {
26 | 		this.id = id;
27 | 		this.parentId = parentId;
28 | 	}
29 | 
30 | 	Node(Long id, String nodeName, Long parentId) {
31 | 		this.id = id;
32 | 		this.nodeName = nodeName;
33 | 		this.parentId = parentId;
34 | 	}
35 | 
36 | 	public Long getId() {
37 | 		return id;
38 | 	}
39 | 
40 | 	public void setId(Long id) {
41 | 		this.id = id;
42 | 	}
43 | 
44 | 	public Long getParentId() {
45 | 		return parentId;
46 | 	}
47 | 
48 | 	public void setParentId(Long parentId) {
49 | 		this.parentId = parentId;
50 | 	}
51 | 
52 | 	public String getNodeName() {
53 | 		return nodeName;
54 | 	}
55 | 
56 | 	public void setNodeName(String nodeName) {
57 | 		this.nodeName = nodeName;
58 | 	}
59 | 
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/com.homework/src/mommon/mytest/Recursive.java:
--------------------------------------------------------------------------------
 1 | package mytest;
 2 | 
 3 | public class Recursive {
 4 | 
 5 | 	public static void foo(int num){
 6 | 		num=num-1;
 7 | 		if(num==0)return;
 8 | 		
 9 | 		else
10 | 		{
11 | 			System.out.println(num);
12 | 			foo(num);
13 | 			
14 | 		}
15 | 		
16 | 	}
17 | 	public static void main(String[] args) {
18 | 		// TODO Auto-generated method stub
19 | 		 foo(5);
20 | 	}
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/com.homework/src/mommon/mytest/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package mytest;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/apriori/ItemMap.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.association.apriori;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.Map;
 5 | 
 6 | public class ItemMap {
 7 | 
 8 | 	public String key;
 9 | 	public Integer value=0;
10 | 	
11 | 	public Map<String,Integer> map;
12 | 	
13 | 	public String getKey() {
14 | 		return key;
15 | 	}
16 | 	public void setKey(String key) {
17 | 		this.key = key;
18 | 	}
19 | 	public Integer getValue() {
20 | 		return value;
21 | 	}
22 | 	public void setValue(Integer value) {
23 | 		this.value = value;
24 | 	}
25 | 	public Map<String, Integer> getMap() {
26 | 		if(map==null){
27 | 			map=new HashMap<String,Integer>();
28 | 		}
29 | 		return map;
30 | 	}
31 | 	public void setMap(Map<String, Integer> map) {
32 | 		this.map = map;
33 | 	}
34 | 	
35 | }
36 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/apriori/Subset.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.association.apriori;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | /**
 6 |  * 求集合的子集，不包含自己和空集
 7 |  * @author Administrator
 8 |  * 算法：先取出第一个，取出一下个的时候与前面的所有项逐个搭配，所以两次FOR就可以了
 9 |  * 1  //第一次FOR,i=0
10 |  * 12，2  //第二次FOR,i=1
11 |  * 123，23，13，3  //第三次FOR,i=2
12 |  * 14,124,24,1234,234,134,34//第四次FOR,i=3
13 |  * 最后删除本身，为了Apriori算法而增加这一步
14 |  */ 
15 | //本类为测试使用，不在MyApriori里面
16 | public class Subset {
17 | 
18 | 	public static List<String> lis=new ArrayList<String>();
19 | 	public static void main(String[] args) {
20 | 		
21 | 		//subset();
22 | 		// TODO Auto-generated method stub
23 | 		String[] str =new String[] { "1", "2", "3", "4"};
24 | 		StringBuilder sb=new StringBuilder();
25 | 		List<String> li=new ArrayList<String>();
26 | 		for(int i=0;i<str.length;i++){
27 | 			int size=li.size();
28 | 			//取出一下个的时候与前面的所有项逐个搭配
29 | 			for(int j=0;j<size;j++){
30 | 				li.add(li.get(j)+str[i] );
31 | 			}
32 | 			li.add(str[i]);
33 | 			sb.append(str[i]);
34 | 		}
35 | 		//删除本身
36 | 		int n=li.indexOf(sb.toString());
37 | 		li.remove(n);
38 | 		String strr="dd";
39 | 		
40 | 	}
41 | 	
42 | 	public static void subset(){
43 | 		String[] str =new String[] { "I1", "I2", "I3", "I4"};
44 | 		StringBuilder sb=new StringBuilder();
45 | 		List<String> li=new ArrayList<String>();
46 | 		for(int i=0;i<str.length;i++){
47 | 			int size=li.size();
48 | 			//取出一下个的时候与前面的所有项逐个搭配
49 | 			for(int j=0;j<size;j++){
50 | 				li.add(li.get(j)+","+str[i] );
51 | 			}  
52 | 			li.add(str[i]);
53 | 			sb.append(str[i]);
54 | 		}
55 | 		//删除本身
56 | 		int n=li.indexOf(sb.toString()+",");
57 | 		li.remove(n);
58 | 		String strr="dd";
59 | 	}
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/apriori/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.association.apriori;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/common/Definition.java:
--------------------------------------------------------------------------------
1 | package sequence.machinelearning.association.common;
2 | 
3 | public class Definition {
4 | 
5 | 	
6 | }
7 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/common/Mytest.java:
--------------------------------------------------------------------------------
1 | package sequence.machinelearning.association.common;
2 | 
3 | public class Mytest {
4 | 
5 | }
6 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/common/ReadData.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.association.common;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.FileInputStream;
 5 | import java.io.IOException;
 6 | import java.io.InputStreamReader;
 7 | import java.util.HashMap;
 8 | import java.util.Map;
 9 | import java.util.TreeMap;
10 | 
11 | public class ReadData {
12 | 
13 | 	
14 | 	public static TreeMap<String,String> dataMap=new TreeMap<String,String>();
15 | 	public static final void readF1() throws IOException {      
16 | 		
17 | 		//String filePath="scripts/clustering/canopy/canopy.dat";
18 | 		String filePath="datafile/association/items";
19 | 		BufferedReader br = new BufferedReader(new InputStreamReader(
20 |         new FileInputStream(filePath)));
21 |         for (String line = br.readLine(); line != null; line = br.readLine()) {
22 |             if(line.length()==0||"".equals(line))continue;
23 |         	String[] str=line.split("\t");               
24 |         	dataMap.put(str[0], str[1].trim());
25 |             //System.out.println(line);               
26 |         }
27 |         br.close();
28 |         
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/common/SortTest.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.association.common;
 2 | 
 3 | import java.util.Comparator;             
 4 | import java.util.List;             
 5 | import java.util.ArrayList;             
 6 | import java.util.Collections;             
 7 |              
 8 | class User {             
 9 |  String name;             
10 |  String age;             
11 |              
12 |  public User(String name,String age){             
13 |   this.name=name;             
14 |   this.age=age;             
15 |  }             
16 |  public String getAge() {             
17 |   return age;             
18 |  }             
19 |  public void setAge(String age) {             
20 |   this.age = age;             
21 |  }             
22 |  public String getName() {             
23 |   return name;             
24 |  }             
25 |  public void setName(String name) {             
26 |   this.name = name;             
27 |  }              
28 | }             
29 |              
30 | class ComparatorUser implements Comparator{             
31 |              
32 |  public int compare(Object arg0, Object arg1) {             
33 |   User user0=(User)arg0;             
34 |   User user1=(User)arg1;             
35 |   //首先比较年龄，如果年龄相同，则比较名字             
36 |   int flag=user0.getAge().compareTo(user1.getAge());             
37 |   if(flag==0){             
38 |    return user0.getName().compareTo(user1.getName());             
39 |   }else{             
40 |    return flag;             
41 |   }               
42 |  }             
43 |              
44 | }             
45 |              
46 | public class SortTest {             
47 |              
48 |              
49 |  public static void main(String[] args){             
50 |   List userlist=new ArrayList();             
51 |   userlist.add(new User("dd","4"));             
52 |   userlist.add(new User("aa","1"));             
53 |   userlist.add(new User("ee","5"));             
54 |   userlist.add(new User("bb","2"));               
55 |   userlist.add(new User("ff","5"));             
56 |   userlist.add(new User("cc","3"));             
57 |   userlist.add(new User("gg","6"));             
58 |                
59 |   ComparatorUser comparator=new ComparatorUser();             
60 |   Collections.sort(userlist, comparator);             
61 |                 
62 |   for (int i=0;i<userlist.size();i++){             
63 |    User user_temp=(User)userlist.get(i);             
64 |       System.out.println(user_temp.getAge()+","+user_temp.getName());              
65 |   }             
66 |                
67 |  }             
68 | }             
69 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/common/Transaction.java:
--------------------------------------------------------------------------------
  1 | package sequence.machinelearning.association.common;
  2 | 
  3 | import java.io.IOException;
  4 | import java.util.ArrayList;
  5 | import java.util.Iterator;
  6 | import java.util.LinkedList;
  7 | import java.util.List;
  8 | import java.util.Map;
  9 | import java.util.TreeMap;
 10 | import java.util.Map.Entry;
 11 | 
 12 | import sequence.machinelearning.association.apriori.ItemMap;
 13 | 
 14 | public class Transaction {
 15 | 
 16 | 	public static final int  support = 2; // 设定最小支持频次为2 
 17 | 	public static  int  itemnum = 0; // 设定购物篮中最大项数
 18 | 	//放到Map中排序
 19 | 	public static TreeMap<String,Integer> tmap=new TreeMap<String,Integer>();
 20 | 	
 21 | 	/**
 22 | 	 * 扫描事务集以确定频繁1项集(找出C1)
 23 | 	 */
 24 | 	public static List<ItemMap> findFrequentOneItemSets(Map<String,String> map){
 25 | 		TreeMap<String,Integer> treemap=new TreeMap<String,Integer>();
 26 | 		Iterator<Entry<String,String>> iter=map.entrySet().iterator();
 27 | 		Entry<String,String> entry;
 28 | 		while(iter.hasNext()){
 29 | 			entry=iter.next();
 30 | 			String str=entry.getValue();
 31 | 			if(str.length()<1)continue;
 32 | 			String[] items=str.split(",");
 33 | 			//找出购物栏最大的项,为循环连接做准备
 34 | 			if(items.length>itemnum)itemnum=items.length;
 35 | 			for(int i=0;i<items.length;i++){
 36 | 				if(treemap.containsKey(items[i].trim())){
 37 | 					Integer count=treemap.get(items[i].trim())+1;
 38 | 					treemap.put(items[i].trim(), count);
 39 | 				}else{
 40 | 					treemap.put(items[i].trim(), 1);
 41 | 				}
 42 | 			}
 43 | 		}
 44 | 		return DeleteItem(treemap);
 45 | 	}
 46 | 	
 47 | 	//放到Map中排序
 48 | 	public static void toMap() throws IOException{
 49 | 	   //ReadData.readF1(); 
 50 | 	   List<ItemMap> lif1=Transaction.findFrequentOneItemSets(ReadData.dataMap);
 51 | 	   for(int i=0;i<lif1.size();i++){
 52 | 		   ItemMap item=lif1.get(i);
 53 | 		   tmap.put(item.getKey(), item.getValue());
 54 | 	   }
 55 | 	}
 56 | 	public static  LinkedList<String> itemsort(String[] items){
 57 | 		LinkedList<String> linst=new LinkedList<String>();
 58 | 		//选择法排序
 59 | 		int len=items.length;
 60 | 		for(int i=0;i<len;i++){
 61 | 			
 62 | 			for(int j=i+1;j<len;j++){
 63 | 				if(!tmap.containsKey(items[i]))continue;
 64 | 				if(!tmap.containsKey(items[j]))continue;
 65 | 				int num=tmap.get(items[i]);
 66 | 				int nextnum=tmap.get(items[j]);
 67 | 				if(num<nextnum){
 68 | 					String tmp=items[i];
 69 | 					items[i]=items[j];
 70 | 					items[j]=tmp;
 71 | 				}
 72 | 			}
 73 | 			linst.add(items[i]);
 74 | 		}
 75 | 		return linst;
 76 | 	}
 77 | 	/**
 78 | 	 * 剪枝:产生候选项，删除最小事务支持的选项
 79 | 	 * */
 80 | 	public static List<ItemMap> DeleteItem(TreeMap<String,Integer> map){
 81 | 		List<ItemMap> listmap=new ArrayList<ItemMap>();
 82 | 		Iterator<Entry<String,Integer>> iter=map.entrySet().iterator();
 83 | 		Entry<String,Integer> entry;
 84 | 		while(iter.hasNext()){
 85 | 			entry=iter.next();
 86 | 			if(entry.getValue()>=support){
 87 | 				ItemMap item=new ItemMap();
 88 | 				item.setKey(entry.getKey());
 89 | 				item.setValue(entry.getValue());
 90 | 				if(listmap.size()==0)listmap.add(item);
 91 | 				else{
 92 | 					
 93 | 					ItemMap tail=new ItemMap();
 94 | 					int size=listmap.size();
 95 | 					tail=listmap.get(size-1);
 96 | 					if(item.getValue()>tail.getValue()){
 97 | 					    listmap.remove(size-1);
 98 | 					    listmap.add(item);
 99 | 					    listmap.add(tail);
100 | 					}else{
101 | 						listmap.add(item);
102 | 					}
103 | 					
104 | 				  }
105 | 				
106 | 			}
107 | 		}
108 | 		return listmap;
109 | 	}
110 | }
111 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/common/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.association.common;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/fpgrowth/TreeNode2.java:
--------------------------------------------------------------------------------
  1 | package sequence.machinelearning.association.fpgrowth;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.List;
  5 | 
  6 | 
  7 | 
  8 | public class TreeNode2 implements Comparable<TreeNode2>{
  9 | 
 10 |     private String name; // 节点名称
 11 |     private Integer count; // 计数
 12 |     private TreeNode2 parent; // 父节点
 13 |     private List<TreeNode2> children; // 子节点
 14 |     private TreeNode2 nextHomonym; // 下一个同名节点
 15 |   
 16 |     public TreeNode2() {
 17 |   
 18 |     }
 19 | 
 20 | 	public String getName() {
 21 | 		return name;
 22 | 	}
 23 | 
 24 | 	public void setName(String name) {
 25 | 		this.name = name;
 26 | 	}
 27 | 
 28 | 	public Integer getCount() {
 29 | 		return count;
 30 | 	}
 31 | 
 32 | 	public void setCount(Integer count) {
 33 | 		this.count = count;
 34 | 	}
 35 | 	public void Sum(Integer count) {
 36 | 		this.count =this.count+count;
 37 | 	}
 38 | 	public TreeNode2 getParent() {
 39 | 		return parent;
 40 | 	}
 41 | 
 42 | 	public void setParent(TreeNode2 parent) {
 43 | 		this.parent = parent;
 44 | 	}
 45 | 
 46 | 	public List<TreeNode2> getChildren() {
 47 | 		return children;
 48 | 	}
 49 | 
 50 | 	public void setChildren(List<TreeNode2> children) {
 51 | 		this.children = children;
 52 | 	}
 53 | 
 54 | 	public TreeNode2 getNextHomonym() {
 55 | 		return nextHomonym;
 56 | 	}
 57 | 
 58 | 	public void setNextHomonym(TreeNode2 nextHomonym) {
 59 | 		this.nextHomonym = nextHomonym;
 60 | 	}
 61 |     /**
 62 |      * 添加一个节点
 63 |      * @param child
 64 |      */
 65 |     public void addChild(TreeNode2 child) {
 66 |         if (this.getChildren() == null) {
 67 |             List<TreeNode2> list = new ArrayList<TreeNode2>();
 68 |             list.add(child);
 69 |             this.setChildren(list);
 70 |         } else {
 71 |             this.getChildren().add(child);
 72 |         }
 73 |     }
 74 |     /**
 75 |     *  是否存在着该节点,存在返回该节点，不存在返回空
 76 |     * @param name
 77 |     * @return
 78 |     */
 79 |     public TreeNode2 findChild(String name) {
 80 |         List<TreeNode2> children = this.getChildren();
 81 |         if (children != null) {
 82 |             for (TreeNode2 child : children) {
 83 |                 if (child.getName().equals(name)) {
 84 |                     return child;
 85 |                 }
 86 |             }
 87 |         }
 88 |         return null;
 89 |     }
 90 | 
 91 | 
 92 |     @Override
 93 |     public int compareTo(TreeNode2 arg0) {
 94 |         // TODO Auto-generated method stub
 95 |         int count0 = arg0.getCount();
 96 |         // 跟默认的比较大小相反，导致调用Arrays.sort()时是按降序排列
 97 |         return count0 - this.count;
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/fpgrowth/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.association.fpgrowth;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/fpgtest/TreeNode.java:
--------------------------------------------------------------------------------
  1 | package sequence.machinelearning.association.fpgtest;
  2 | 
  3 | import java.util.ArrayList;
  4 | import java.util.List;
  5 |   
  6 | public class TreeNode implements Comparable<TreeNode> {
  7 |   
  8 |     private String name; // 节点名称
  9 |     private int count; // 计数
 10 |     private TreeNode parent; // 父节点
 11 |     private List<TreeNode> children; // 子节点
 12 |     private TreeNode nextHomonym; // 下一个同名节点
 13 |   
 14 |     public TreeNode() {
 15 |   
 16 |     }
 17 |   
 18 |     public TreeNode(String name) {
 19 |         this.name = name;
 20 |     }
 21 |   
 22 |     public String getName() {
 23 |         return name;
 24 |     }
 25 |   
 26 |     public void setName(String name) {
 27 |         this.name = name;
 28 |     }
 29 |   
 30 |     public int getCount() {
 31 |         return count;
 32 |     }
 33 |   
 34 |     public void setCount(int count) {
 35 |         this.count = count;
 36 |     }
 37 |   
 38 |     public TreeNode getParent() {
 39 |         return parent;
 40 |     }
 41 |   
 42 |     public void setParent(TreeNode parent) {
 43 |         this.parent = parent;
 44 |     }
 45 |   
 46 |     public List<TreeNode> getChildren() {
 47 |         return children;
 48 |     }
 49 |   
 50 |     public void addChild(TreeNode child) {
 51 |         if (this.getChildren() == null) {
 52 |             List<TreeNode> list = new ArrayList<TreeNode>();
 53 |             list.add(child);
 54 |             this.setChildren(list);
 55 |         } else {
 56 |             this.getChildren().add(child);
 57 |         }
 58 |     }
 59 |   
 60 |     public TreeNode findChild(String name) {
 61 |         List<TreeNode> children = this.getChildren();
 62 |         if (children != null) {
 63 |             for (TreeNode child : children) {
 64 |                 if (child.getName().equals(name)) {
 65 |                     return child;
 66 |                 }
 67 |             }
 68 |         }
 69 |         return null;
 70 |     }
 71 |   
 72 |     public void setChildren(List<TreeNode> children) {
 73 |         this.children = children;
 74 |     }
 75 |   
 76 |     public void printChildrenName() {
 77 |         List<TreeNode> children = this.getChildren();
 78 |         if (children != null) {
 79 |             for (TreeNode child : children) {
 80 |                 System.out.print(child.getName() + " ");
 81 |             }
 82 |         } else {
 83 |             System.out.print("null");
 84 |         }
 85 |     }
 86 |   
 87 |     public TreeNode getNextHomonym() {
 88 |         return nextHomonym;
 89 |     }
 90 |   
 91 |     public void setNextHomonym(TreeNode nextHomonym) {
 92 |         this.nextHomonym = nextHomonym;
 93 |     }
 94 |   
 95 |     public void countIncrement(int n) {
 96 |         this.count += n;
 97 |     }
 98 |   
 99 |     @Override
100 |     public int compareTo(TreeNode arg0) {
101 |         // TODO Auto-generated method stub
102 |         int count0 = arg0.getCount();
103 |         // 跟默认的比较大小相反，导致调用Arrays.sort()时是按降序排列
104 |         return count0 - this.count;
105 |     }
106 | }
107 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/fpgtest/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.association.fpgtest;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/association/sequence/machinelearning/association/otherdemo/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.association.otherdemo;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/clustering/sequence/machinelearning/clustering/canopy/MyCanopy.java:
--------------------------------------------------------------------------------
  1 | package sequence.machinelearning.clustering.canopy;
  2 | 
  3 | import java.io.BufferedReader;
  4 | import java.io.FileInputStream;
  5 | import java.io.FileReader;
  6 | import java.io.IOException;
  7 | import java.io.InputStreamReader;
  8 | import java.util.ArrayList;
  9 | import java.util.List;
 10 | import java.util.Vector;
 11 | 
 12 | public class MyCanopy {
 13 | 
 14 | 	//x,y之间当且仅当有一个空格，要严格控制，为了与MAHOUT中的输入格式一致，所以这里也采用空格作为分隔。
 15 | 	static Vector<Point>  li=new Vector<Point>();
 16 | 	//static List<Point>  li=new ArrayList<Point>();
 17 | 	static List<Vector<Point>> list=new ArrayList<Vector<Point>>();
 18 | 	private final static Double t1=8.0;
 19 | 	private final static Double t2=4.0;
 20 | 	//简单地采用曼哈顿距离|x1 – x2| + |y1 – y2|
 21 | 	
 22 | 	public static final void readF1() throws IOException {      
 23 | 		
 24 | 		//String filePath="scripts/clustering/canopy/canopy.dat";
 25 | 		String filePath="datafile/cluster/simple_k-means.txt";
 26 | 		BufferedReader br = new BufferedReader(new InputStreamReader(
 27 |         new FileInputStream(filePath)));
 28 |         for (String line = br.readLine(); line != null; line = br.readLine()) {
 29 |             if(line.length()==0||"".equals(line))continue;
 30 |         	String[] str=line.split(" ");               
 31 |             Point p0=new Point();
 32 |     		p0.setX(Double.valueOf(str[0]));
 33 |     		p0.setY(Double.valueOf(str[1]));
 34 |     		li.add(p0);
 35 |             //System.out.println(line);               
 36 |         }
 37 |         br.close();
 38 |     }
 39 | 	//简单地采用曼哈顿距离|x1 – x2| + |y1 – y2|
 40 | 	public static  Double DistanceMeasure(Point p1,Point p2){
 41 | 		return Math.abs(p2.getX()-p1.getX()) +Math.abs(p2.getY()-p1.getY());
 42 | 	}
 43 | 	public static void clustering(){
 44 |           
 45 | 		//初始化一个canopy   
 46 | 		   Point p0=new Point();
 47 | 		   p0=li.get(0);
 48 | 		   Vector<Point>  v1=new Vector<Point>();
 49 | 		   v1.add(p0);
 50 | 		   list.add(v1);
 51 | 		   li.remove(0); 
 52 | 		   System.out.println("中心点为:"+p0.getX()+","+p0.getY());
 53 | 		while(0<li.size()){
 54 | 			Point p1=li.get(0);
 55 | 			//如果属于已有的聚类，记为true,否则false
 56 | 			boolean inside=false;
 57 | 			for(int i=0;i<list.size();i++){
 58 | 				Vector<Point>  v=list.get(i);
 59 | 				Point p2=v.get(0);
 60 | 				double dist =DistanceMeasure(p1,p2);
 61 |                 //如果小于t2，属于当前的聚类,已经够接近了，不需要再聚类了，所以删除
 62 | 				if(dist<t2){
 63 | 					inside=true;
 64 | 					list.get(i).add(p1);
 65 | 					li.remove(0);
 66 | 					System.out.println("C"+i+":"+p1.getX()+","+p1.getY()+"dist<t2");
 67 | 					
 68 | 				}
 69 | 				//如果t2<=dist<t1，属于当前的聚类,但还不确定他是否比其它的聚类更加接近，所以不删除，等待下一轮的观察。
 70 |                 if(t2<=dist&&dist<t1){
 71 |                 	//参加本类聚类过的点不再参与聚类
 72 |                 	if(p1.getSign()==i){
 73 | 						continue;
 74 | 					}
 75 |                 	list.get(i).add(p1);
 76 | 					inside=true;
 77 | 					li.remove(0);
 78 | 					li.add(p1);
 79 | 					
 80 | 					//是否曾经参与过本次聚类，记上聚类的索引号，下次再来的时候不伺候
 81 | 					p1.setSign(i);
 82 | 					
 83 | 					System.out.println("C"+i+":"+p1.getX()+","+p1.getY()+"t2<=dist<t1");
 84 | 				}
 85 |              }
 86 | 			//如果不属于现有的任何一个聚类，则新建一个聚类,然后删除该点
 87 | 			if(!inside){
 88 | 				   Vector<Point>  vec=new Vector<Point>();
 89 | 				   vec.add(p1);
 90 | 				   li.remove(0);
 91 | 				   list.add(vec);
 92 | 				   	
 93 | 			}
 94 | 			//与各个已经形成的聚类比较距离，比较结束后将其删除，以结束循环
 95 | 			if(li.get(0).getSign()!=-1){
 96 | 				li.remove(0);
 97 | 			}
 98 |          }
 99 | 		String ss="ddd";
100 | 	}
101 | 	
102 | 	
103 | 
104 | 	
105 | 	
106 | 	
107 | 	public static void main(String[] args) throws IOException {
108 | 		// TODO Auto-generated method stub
109 | 		readF1();
110 | 		
111 | 		clustering();
112 | 		String ss="ddd";
113 | 	}
114 | 
115 | }
116 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/clustering/sequence/machinelearning/clustering/canopy/Point.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.clustering.canopy;
 2 | 
 3 | public class Point {
 4 | 
 5 | 	private Double x;
 6 | 	private Double y;
 7 | 	private Integer sign=-1;
 8 | 	public Double getX() {
 9 | 		return x;
10 | 	}
11 | 	public void setX(Double x) {
12 | 		this.x = x;
13 | 	}
14 | 	public Double getY() {
15 | 		return y;
16 | 	}
17 | 	public void setY(Double y) {
18 | 		this.y = y;
19 | 	}
20 | 	public Integer getSign() {
21 | 		return sign;
22 | 	}
23 | 	public void setSign(Integer sign) {
24 | 		this.sign = sign;
25 | 	}
26 | 	
27 | }
28 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/clustering/sequence/machinelearning/clustering/canopy/UserPoint.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.clustering.canopy;
 2 | 
 3 | public class UserPoint {
 4 | 
 5 | 	private Double x;
 6 | 	private Double y;
 7 | 	private Integer sign=-1;
 8 | 	private String userid;
 9 | 	
10 | 	public Double getX() {
11 | 		return x;
12 | 	}
13 | 	public void setX(Double x) {
14 | 		this.x = x;
15 | 	}
16 | 	public Double getY() {
17 | 		return y;
18 | 	}
19 | 	public void setY(Double y) {
20 | 		this.y = y;
21 | 	}
22 | 	public Integer getSign() {
23 | 		return sign;
24 | 	}
25 | 	public void setSign(Integer sign) {
26 | 		this.sign = sign;
27 | 	}
28 | 	public String getUserid() {
29 | 		return userid;
30 | 	}
31 | 	public void setUserid(String userid) {
32 | 		this.userid = userid;
33 | 	}
34 | 	
35 | }
36 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/clustering/sequence/machinelearning/clustering/canopy/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.clustering.canopy;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/clustering/sequence/machinelearning/clustering/kmeans/MyKmeans.java:
--------------------------------------------------------------------------------
  1 | package sequence.machinelearning.clustering.kmeans;
  2 | 
  3 | import java.io.BufferedReader;
  4 | import java.io.FileInputStream;
  5 | import java.io.IOException;
  6 | import java.io.InputStreamReader;
  7 | import java.util.ArrayList;
  8 | import java.util.List;
  9 | import java.util.Vector;
 10 | 
 11 | import sequence.machinelearning.clustering.canopy.Point;
 12 | 
 13 | 
 14 | 
 15 | public class MyKmeans {
 16 | 
 17 | 	static Vector<Point>  li=new Vector<Point>();
 18 | 	//static List<Point>  li=new ArrayList<Point>();
 19 | 	static List<Vector<Point>> list=new ArrayList<Vector<Point>>(); //每次迭代保存结果，一个vector代表一个簇
 20 | 	private final static Integer K=2; //选K=2，也就是估算有两个簇。
 21 | 	private final static Double converge=0.001; //当距离小于某个值的时候，就认为聚类已经聚类了，不需要再迭代，这里的值选0.001	
 22 | 	
 23 | 	//读取数据
 24 | 	public static final void readF1() throws IOException {      
 25 | 		String filePath="datafile/cluster/simple_k-means.txt";
 26 | 		BufferedReader br = new BufferedReader(new InputStreamReader(
 27 |         new FileInputStream(filePath)));
 28 |         for (String line = br.readLine(); line != null; line = br.readLine()) {
 29 |             if(line.length()==0||"".equals(line))continue;
 30 |         	String[] str=line.split(" ");               
 31 |             Point p0=new Point();
 32 |     		p0.setX(Double.valueOf(str[0]));
 33 |     		p0.setY(Double.valueOf(str[1]));
 34 |     		li.add(p0);
 35 |             //System.out.println(line);               
 36 |         }
 37 |         br.close();
 38 |     }
 39 | 	  //math.sqrt(double n)
 40 |     //扩展下，如果要给m开n次方就用java.lang.StrictMath.pow(m,1.0/n);
 41 | 	//采用欧氏距离
 42 | 	public static  Double DistanceMeasure(Point p1,Point p2){
 43 | 		
 44 | 		Double tmp=StrictMath.pow(p2.getX()-p1.getX(), 2)+StrictMath.pow(p2.getY()-p1.getY(), 2);
 45 | 		return Math.sqrt(tmp);
 46 | 	}
 47 | 	
 48 | 	//计算新的簇心
 49 | 	public static Double CalCentroid(){
 50 | 		System.out.println("------------------------------------------------");
 51 | 		Double movedist=Double.MAX_VALUE;
 52 | 		for(int i=0;i<list.size();i++){
 53 | 			Vector<Point> subli=list.get(i);
 54 | 			Point po=new Point();
 55 | 			Double sumX=0.0;
 56 | 			Double sumY=0.0;
 57 | 			Double Clusterlen=Double.valueOf(subli.size());
 58 | 			for(int j=0;j<Clusterlen;j++){
 59 | 				Point nextp=subli.get(j);
 60 | 				sumX=sumX+nextp.getX();
 61 | 				sumY=sumY+nextp.getY();
 62 | 			}
 63 | 			po.setX(sumX/Clusterlen);
 64 | 			po.setY(sumY/Clusterlen);
 65 | 			//新的点与旧点之间的距离
 66 | 			Double dist=DistanceMeasure(subli.get(0),po);
 67 | 			//在多个簇心移动的过程中，返回移动距离最小的值
 68 | 			if(dist<movedist)movedist=dist;
 69 | 			list.get(i).clear();
 70 | 			list.get(i).add(po);
 71 | 			System.out.println("C"+i+"的簇心为："+po.getX()+","+po.getY());
 72 | 		}
 73 | 		String test="ll";
 74 | 		return movedist;
 75 | 	}
 76 | 	//本次的簇心
 77 | 	//下一次移动的簇心
 78 | 	
 79 | 	private static Double move=Double.MAX_VALUE;//移动距离
 80 | 	//不断地迭代，直到收敛
 81 | 	public static void RecursionKluster(){
 82 | 		for(int times=2;move>converge;times++){
 83 | 			System.out.println("第"+times+"次迭代");
 84 | 			//默认每一个list里的Vector第0个元素是质心
 85 | 			for(int i=0;i<li.size();i++){
 86 | 				Point p=new Point();
 87 | 				 p=li.get(i);
 88 | 				int index = -1;
 89 | 				
 90 | 	            double neardist = Double.MAX_VALUE;
 91 | 				for(int k=0;k<K;k++){
 92 | 					Point centre=list.get(k).get(0);
 93 | 					double currentdist=DistanceMeasure(p,centre);
 94 | 					if(currentdist<neardist){
 95 | 						neardist=currentdist;
 96 | 						index=k;
 97 | 					}
 98 | 				}
 99 | 				
100 | 				System.out.println("C"+index+":的点为："+p.getX()+","+p.getY());
101 | 				list.get(index).add(p);
102 | 				
103 | 			}
104 | 			//重新计算簇心,并返回移动的距离，最小的那个距离
105 | 			
106 | 			move=CalCentroid();
107 | 			System.out.println("各个簇心移动中最小的距离为，move="+move);
108 | 		}
109 | 	}
110 | 	
111 | 	public static void Kluster(){
112 | 		
113 | 		for(int k=0;k<K;k++){
114 | 			Vector<Point> vect=new Vector<Point>();
115 | 			Point p=new Point();
116 | 			p=li.get(k);
117 | 			vect.add(p);
118 | 			list.add(vect);
119 | 		}
120 | 		System.out.println("第1次迭代");
121 | 		//默认每一个list里的Vector第0个元素是质心
122 | 		for(int i=K;i<li.size();i++){
123 | 			Point p=new Point();
124 | 			 p=li.get(i);
125 | 			int index = -1;
126 | 			
127 |             double neardist = Double.MAX_VALUE;
128 | 			for(int k=0;k<K;k++){
129 | 				Point centre=list.get(k).get(0);
130 | 				double currentdist=DistanceMeasure(p,centre);
131 | 				if(currentdist<neardist){
132 | 					neardist=currentdist;
133 | 					index=k;
134 | 				}
135 | 			}
136 | 			
137 | 			System.out.println("C"+index+":的点为："+p.getX()+","+p.getY());
138 | 			list.get(index).add(p);
139 | 			
140 | 		}
141 | 		
142 | 	}
143 | 	
144 | 	public static void main(String[] args) throws IOException {
145 | 		// TODO Auto-generated method stub
146 | 		//读取数据
147 | 		readF1();
148 | 		//第一次迭代
149 | 		Kluster();
150 | 		//第一次迭代后计算簇心
151 | 		CalCentroid();
152 | 		//不断迭代，直到收敛
153 | 		RecursionKluster();
154 | 	}
155 | 
156 | }
157 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/clustering/sequence/machinelearning/clustering/kmeans/MyKmeansForUser.java:
--------------------------------------------------------------------------------
  1 | package sequence.machinelearning.clustering.kmeans;
  2 | import java.io.BufferedReader;
  3 | import java.io.FileInputStream;
  4 | import java.io.IOException;
  5 | import java.io.InputStreamReader;
  6 | import java.util.ArrayList;
  7 | import java.util.List;
  8 | import java.util.Vector;
  9 | 
 10 | import sequence.machinelearning.clustering.canopy.UserPoint;
 11 | 
 12 | 
 13 | 
 14 | public class MyKmeansForUser {
 15 | 
 16 | 	static Vector<UserPoint>  li=new Vector<UserPoint>();
 17 | 	//static List<UserPoint>  li=new ArrayList<UserPoint>();
 18 | 	static List<Vector<UserPoint>> list=new ArrayList<Vector<UserPoint>>(); //每次迭代保存结果，一个vector代表一个簇
 19 | 	private final static Integer K=3; //选K=2，也就是估算有两个簇。
 20 | 	private final static Double converge=0.01; //当距离小于某个值的时候，就认为聚类已经聚类了，不需要再迭代，这里的值选0.001	
 21 | 	
 22 | 	//读取数据
 23 | 	public static final void readF1() throws IOException {      
 24 | 		String filePath="datafile/cluster/data.csv";
 25 | 		BufferedReader br = new BufferedReader(new InputStreamReader(
 26 |         new FileInputStream(filePath)));
 27 |         for (String line = br.readLine(); line != null; line = br.readLine()) {
 28 |             if(line.length()==0||"".equals(line))continue;
 29 |         	String[] str=line.split(",");               
 30 |             UserPoint p0=new UserPoint();
 31 |             p0.setUserid(str[0]);
 32 |     		p0.setX(Double.valueOf(str[1]));
 33 |     		p0.setY(Double.valueOf(str[2]));
 34 |     		li.add(p0);
 35 |             //System.out.println(line);               
 36 |         }
 37 |         br.close();
 38 |     }
 39 | 	  //math.sqrt(double n)
 40 |     //扩展下，如果要给m开n次方就用java.lang.StrictMath.pow(m,1.0/n);
 41 | 	//采用欧氏距离
 42 | 	public static  Double DistanceMeasure(UserPoint p1,UserPoint p2){
 43 | 		
 44 | 		Double tmp=StrictMath.pow(p2.getX()-p1.getX(), 2)+StrictMath.pow(p2.getY()-p1.getY(), 2);
 45 | 		return Math.sqrt(tmp);
 46 | 	}
 47 | 	
 48 | 	//计算新的簇心
 49 | 	public static Double CalCentroid(){
 50 | 		System.out.println("------------------------------------------------");
 51 | 		Double movedist=Double.MAX_VALUE;
 52 | 		for(int i=0;i<list.size();i++){
 53 | 			Vector<UserPoint> subli=list.get(i);
 54 | 			UserPoint po=new UserPoint();
 55 | 			Double sumX=0.0;
 56 | 			Double sumY=0.0;
 57 | 			Double Clusterlen=Double.valueOf(subli.size());
 58 | 			for(int j=0;j<Clusterlen;j++){
 59 | 				UserPoint nextp=subli.get(j);
 60 | 				sumX=sumX+nextp.getX();
 61 | 				sumY=sumY+nextp.getY();
 62 | 			}
 63 | 			po.setX(sumX/Clusterlen);
 64 | 			po.setY(sumY/Clusterlen);
 65 | 			//新的点与旧点之间的距离
 66 | 			Double dist=DistanceMeasure(subli.get(0),po);
 67 | 			//在多个簇心移动的过程中，返回移动距离最小的值
 68 | 			if(dist<movedist)movedist=dist;
 69 | 			list.get(i).clear();
 70 | 			list.get(i).add(po);
 71 | 			System.out.println("C"+i+"的簇心为："+po.getX()+","+po.getY()+"用户ID："+po.getUserid());
 72 | 		}
 73 | 		String test="ll";
 74 | 		return movedist;
 75 | 	}
 76 | 	//本次的簇心
 77 | 	//下一次移动的簇心
 78 | 	
 79 | 	private static Double move=Double.MAX_VALUE;//移动距离
 80 | 	//不断地迭代，直到收敛
 81 | 	public static void RecursionKluster(){
 82 | 		for(int times=2;move>converge;times++){
 83 | 			System.out.println("第"+times+"次迭代");
 84 | 			//默认每一个list里的Vector第0个元素是质心
 85 | 			for(int i=0;i<li.size();i++){
 86 | 				UserPoint p=new UserPoint();
 87 | 				 p=li.get(i);
 88 | 				int index = -1;
 89 | 				
 90 | 	            double neardist = Double.MAX_VALUE;
 91 | 				for(int k=0;k<K;k++){
 92 | 					UserPoint centre=list.get(k).get(0);
 93 | 					double currentdist=DistanceMeasure(p,centre);
 94 | 					if(currentdist<neardist){
 95 | 						neardist=currentdist;
 96 | 						index=k;
 97 | 					}
 98 | 				}
 99 | 				
100 | 				System.out.println("C"+index+":的点为："+p.getX()+","+p.getY()+"----用户ID："+p.getUserid());
101 | 				list.get(index).add(p);
102 | 				
103 | 			}
104 | 			//重新计算簇心,并返回移动的距离，最小的那个距离
105 | 			
106 | 			move=CalCentroid();
107 | 			System.out.println("本次簇心移动距离为，move="+move);
108 | 		}
109 | 	}
110 | 	
111 | 	public static void Kluster(){
112 | 		
113 | 		for(int k=0;k<K;k++){
114 | 			Vector<UserPoint> vect=new Vector<UserPoint>();
115 | 			UserPoint p=new UserPoint();
116 | 			p=li.get(k);
117 | 			vect.add(p);
118 | 			list.add(vect);
119 | 		}
120 | 		System.out.println("第1次迭代");
121 | 		//默认每一个list里的Vector第0个元素是质心
122 | 		for(int i=K;i<li.size();i++){
123 | 			UserPoint p=new UserPoint();
124 | 			 p=li.get(i);
125 | 			int index = -1;
126 | 			
127 |             double neardist = Double.MAX_VALUE;
128 | 			for(int k=0;k<K;k++){
129 | 				UserPoint centre=list.get(k).get(0);
130 | 				double currentdist=DistanceMeasure(p,centre);
131 | 				if(currentdist<neardist){
132 | 					neardist=currentdist;
133 | 					index=k;
134 | 				}
135 | 			}
136 | 			
137 | 			System.out.println("C"+index+":的点为："+p.getX()+","+p.getY()+"----用户ID："+p.getUserid());
138 | 			list.get(index).add(p);
139 | 			
140 | 		}
141 | 		
142 | 	}
143 | 	
144 | 	public static void main(String[] args) throws IOException {
145 | 		// TODO Auto-generated method stub
146 | 		//读取数据
147 | 		readF1();
148 | 		//第一次迭代
149 | 		Kluster();
150 | 		//第一次迭代后计算簇心
151 | 		CalCentroid();
152 | 		//不断迭代，直到收敛
153 | 		RecursionKluster();
154 | 	}
155 | 
156 | }
157 | 
158 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/clustering/sequence/machinelearning/clustering/kmeans/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.clustering.kmeans;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/c45/DecisionTreeNode.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.decisiontree.c45;
 2 | public class DecisionTreeNode {  
 3 | 
 4 |     DecisionTreeNode parentNode;  
 5 | 
 6 |     String parentArrtibute;  
 7 | 
 8 |     String nodeName;  
 9 | 
10 |     String[] arrtibutesArray;  
11 | 
12 |     DecisionTreeNode[] childNodesArray;  
13 | 
14 | }  
15 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/c45/DecisionTreeUtil.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.decisiontree.c45;
 2 | 
 3 | public class DecisionTreeUtil {  
 4 | 
 5 | 	/** 
 6 | 	     * entropy:Info(T)=(i=1...k)pi*log（2）pi 
 7 | 	     *  
 8 | 	     * @param x 
 9 | 	     * @param total 
10 | 	     * @return double 
11 | 	     */ 
12 | 	public static double info(int x, int total) {   
13 | 	if (x == 0) {   
14 | 	return 0;   
15 | 	        }   
16 | 	double x_pi = getPi(x, total);   
17 | 	return -(x_pi * logYBase2(x_pi));   
18 | 	    }  
19 | 
20 | 	/** 
21 | 	     * log2y 
22 | 	     *  
23 | 	     * @param y 
24 | 	     * @return double 
25 | 	     */ 
26 | 	public static double logYBase2(double y) {   
27 | 	return Math.log(y) / Math.log(2);   
28 | 	    }  
29 | 
30 | 	/** 
31 | 	     * pi=|C(i,d)|/|D| 
32 | 	     *  
33 | 	     * @param x 
34 | 	     * @param total 
35 | 	     * @return double 
36 | 	     */ 
37 | 	public static double getPi(int x, int total) {   
38 | 	return x / (double) total;   
39 | 	    }  
40 | 
41 | 	}  
42 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/c45/SequenceComparator.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.decisiontree.c45;
 2 | 
 3 | import java.util.Comparator;
 4 | 
 5 | public class SequenceComparator implements Comparator {  
 6 | 
 7 | 	public int compare(Object o1, Object o2) throws ClassCastException {   
 8 | 	        String str1 = (String) o1;   
 9 | 	        String str2 = (String) o2;   
10 | 	return str1.compareTo(str2);   
11 | 	    }   
12 | 	} 
13 | 
14 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/c45/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.decisiontree.c45;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/id3/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.decisiontree.id3;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/id3test/DTreeUtil.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.decisiontree.id3test;
 2 | 
 3 | public class DTreeUtil {
 4 | 	
 5 | 	public static double sigma(int x, int total)
 6 | 	{
 7 | 		if (x == 0)
 8 | 		{
 9 | 			return 0;
10 | 		}
11 | 		double x_pi = getPi(x,total);
12 | 		return -(x_pi*logYBase2(x_pi));
13 | 	}
14 | 
15 | 	
16 | 	public static double logYBase2(double y)
17 | 	{
18 | 		return Math.log(y) / Math.log(2);
19 | 	}
20 | 	
21 | 	
22 | 	public static double getPi(int x, int total)
23 | 	{
24 | 		return x * Double.parseDouble("1.0") / total;
25 | 	}
26 | }
27 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/id3test/SequenceComparator.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.decisiontree.id3test;
 2 | import java.util.Comparator;;
 3 | public class SequenceComparator implements Comparator 
 4 | {
 5 | 	public int compare(Object o1, Object o2) throws ClassCastException{
 6 | 		String str1 = (String)o1;
 7 | 		String str2 = (String)o2;
 8 | 		return str1.compareTo(str2);
 9 | 	}
10 | }
11 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/id3test/TreeNode.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.decisiontree.id3test;
 2 | 
 3 | public class TreeNode {
 4 | 	//父节点
 5 | 	TreeNode parent;
 6 | 	
 7 | 	//指向父的哪个属性
 8 | 	String parentAttribute;
 9 | 	
10 | 	//节点名
11 | 	String nodeName;
12 | 	
13 | 	//属性数组
14 | 	String[] attributes;
15 | 	
16 | 	//节点数组
17 | 	TreeNode[] childNodes;
18 | 	
19 | 	//可信度
20 | 	double percent;
21 | }
22 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/id3test/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.decisiontree.id3test;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/myc45/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Jamas
6 |  *
7 |  */
8 | package sequence.machinelearning.decisiontree.myc45;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/myid3/Maxgain.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.decisiontree.myid3;
 2 | 
 3 | public class Maxgain {
 4 | 	private Double maxgain;
 5 | 	private Integer maxindex;
 6 | 	public Double getMaxgain() {
 7 | 		return maxgain;
 8 | 	}
 9 | 	public void setMaxgain(Double maxgain) {
10 | 		this.maxgain = maxgain;
11 | 	}
12 | 	public Integer getMaxindex() {
13 | 		return maxindex;
14 | 	}
15 | 	public void setMaxindex(Integer maxindex) {
16 | 		this.maxindex = maxindex;
17 | 	}
18 | }
19 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/myid3/Point.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.decisiontree.myid3;
 2 | 
 3 | public class Point {
 4 | 
 5 | 	private Double sv;
 6 | 	private Double entropySv;
 7 | 	public Double getSv() {
 8 | 		return sv;
 9 | 	}
10 | 	public void setSv(Double sv) {
11 | 		this.sv = sv;
12 | 	}
13 | 	public Double getEntropySv() {
14 | 		return entropySv;
15 | 	}
16 | 	public void setEntropySv(Double entropySv) {
17 | 		this.entropySv = entropySv;
18 | 	}
19 | 	
20 | 	
21 | }
22 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/myid3/TheMath.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.decisiontree.myid3;
 2 | 
 3 | import java.util.Iterator;
 4 | import java.util.List;
 5 | import java.util.Map;
 6 | import java.util.Map.Entry;
 7 | 
 8 | public class TheMath {
 9 | 	
10 | 	//信息熵的计算公式，这里仅是离散型二项分布的熵计算
11 | 	/**
12 | 	 * 
13 | 	 * @param S 样本总数
14 | 	 * @param li 各个事件发生的次数
15 | 	 * @return
16 | 	 */
17 | 	public static  Double getEntropy(Integer S,List<Double> li){
18 | 		
19 | 		Double entropy=new Double(0.0);
20 | 		for(int i=0;i<li.size();i++){
21 | 			entropy=entropy+sigma(li.get(i),Double.valueOf(S));
22 | 		}
23 | 		return entropy;
24 | 	}
25 | 	//信息增益公式
26 | 	/**
27 | 	 * @param 此公式完全参照  《机器学习(Tom.Mitchell著)》3.4节
28 | 	 * @param entropyS S的信息熵
29 | 	 * @param S 传入的总数
30 | 	 * @param lasv有两个参数， sv 是sv的个数,sv.entropysv是entropy(sv)
31 | 	 * @return 返回信息增益
32 | 	 */
33 | 	
34 | 	public static Double getGain(Double entropyS,int S,List<Point> lasv){
35 | 		Double gain=new Double(0.0);
36 | 		Double enSum=new Double(0.0);
37 | 		Map.Entry<Double, Double>entry;
38 | 		for(int i=0;i<lasv.size();i++){
39 | 			Point p=lasv.get(i);
40 | 			enSum=enSum+((p.getSv()/Double.valueOf(S))*p.getEntropySv());
41 | 		}
42 | 		
43 | 		gain=entropyS-enSum;
44 | 		return gain;
45 | 	}
46 | 	//公式 -pi*log2(x)
47 | 	public static Double sigma(Double x, Double total)
48 | 	{
49 | 		if (x == 0)
50 | 		{
51 | 			return 0.0;
52 | 		}
53 | 		double x_pi = getProbability(x,total);
54 | 		return -(x_pi*logYBase2(x_pi));
55 | 	}
56 | 
57 | 	//取2为底的对数
58 | 	public static double logYBase2(double y)
59 | 	{
60 | 		return Math.log(y) / Math.log(2);
61 | 	}
62 | 	
63 | 	//等可能事件概率
64 | 	public static double getProbability(double x, double total)
65 | 	{
66 | 		return x * Double.parseDouble("1.0") / total;
67 | 	}
68 | }
69 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/myid3/TreeNode.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.decisiontree.myid3;
 2 | 
 3 | import java.util.ArrayList;
 4 | import java.util.List;
 5 | 
 6 | public class TreeNode {
 7 | 	private String name; // 节点名称
 8 | 	private TreeNode parent; // 父节点
 9 | 	private List<TreeNode> children; // 子节点
10 | 	private String fatherAttribute; // 此节点是父类的哪具属性的分支
11 | 	//可信度
12 | 	private Double percent;
13 | 	
14 | 	//属性数组
15 | 	private ArrayList<String> liatts;
16 | 	
17 |    
18 | 	public ArrayList<String> getLiatts() {
19 | 		return liatts;
20 | 	}
21 | 	public void setLiatts(ArrayList<String> liatts) {
22 | 		this.liatts = liatts;
23 | 	}
24 | 	public String getName() {
25 | 		return name;
26 | 	}
27 | 	public void setName(String name) {
28 | 		this.name = name;
29 | 	}
30 | 	public TreeNode getParent() {
31 | 		return parent;
32 | 	}
33 | 	public void setParent(TreeNode parent) {
34 | 		this.parent = parent;
35 | 	}
36 | 	public List<TreeNode> getChildren() {
37 | 		return children;
38 | 	}
39 | 	public void setChildren(List<TreeNode> children) {
40 | 		this.children = children;
41 | 	}
42 | 	
43 | 	public String getFatherAttribute() {
44 | 		return fatherAttribute;
45 | 	}
46 | 	public void setFatherAttribute(String fatherAttribute) {
47 | 		this.fatherAttribute = fatherAttribute;
48 | 	}
49 | 	public Double getPercent() {
50 | 		return percent;
51 | 	}
52 | 	public void setPercent(Double percent) {
53 | 		this.percent = percent;
54 | 	}
55 | 	/**
56 |      * 添加一个节点
57 |      * @param child
58 |      */
59 |     public void addChild(TreeNode child) {
60 |         if (this.getChildren() == null) {
61 |             List<TreeNode> list = new ArrayList<TreeNode>();
62 |             list.add(child);
63 |             this.setChildren(list);
64 |         } else {
65 |             this.getChildren().add(child);
66 |         }
67 |     }
68 |     /**
69 |     *  是否存在着该节点,存在返回该节点，不存在返回空
70 |     * @param name
71 |     * @return
72 |     */
73 |     public TreeNode findChild(String name) {
74 |         List<TreeNode> children = this.getChildren();
75 |         if (children != null) {
76 |             for (TreeNode child : children) {
77 |                 if (child.getName().equals(name)) {
78 |                     return child;
79 |                 }
80 |             }
81 |         }
82 |         return null;
83 |     }
84 | 	
85 | }
86 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/decisiontree/sequence/machinelearning/decisiontree/myid3/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package sequence.machinelearning.decisiontree.myid3;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/naivebayes/sequence/machinelearning/naivebayes/bayesdemo/Main.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.naivebayes.bayesdemo;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.File;
 5 | import java.io.FileInputStream;
 6 | import java.io.FileOutputStream;
 7 | import java.io.FileWriter;
 8 | import java.io.IOException;
 9 | import java.io.InputStreamReader;
10 | 
11 | public class Main {
12 | 
13 | 	public static void main(String[] args) throws IOException {
14 | 		// TODO Auto-generated method stub
15 | 		Main m=new Main();
16 | 		m.stringBufferDemo();
17 | 		//m.fileWriter("D:/test.txt");
18 | 		m.readF1();
19 | 	}
20 | 	
21 | 	public void fileWriter(String fileName) throws IOException{
22 |         //创建一个FileWriter对象
23 |         FileWriter fw = new FileWriter(fileName);
24 |         //遍历clist集合写入到fileName中
25 |         for (int i=0;i<10;i++){
26 |             fw.write("第"+i+"行----");
27 |             fw.write("\n");
28 |         }
29 |         //刷新缓冲区
30 |         fw.flush();
31 |         //关闭文件流对象
32 |         fw.close();
33 |     }
34 | 
35 | 	
36 | 	
37 | 	/**
38 |     * 利用StringBuffer写文件
39 |     * 该方法可以设定使用何种编码，有效解决中文问题。
40 |     * @throws IOException
41 |     */
42 |    
43 |    public void stringBufferDemo() throws IOException
44 |    {
45 |        String src="datafile/naivebayes/train/out/result.arff";
46 |        delfile(src);
47 |        File file=new File(src);
48 |        if(file.exists())
49 |            file.createNewFile();
50 |        FileOutputStream out=new FileOutputStream(file,true);
51 |        for(int i=0;i<10;i++)
52 |        {
53 |            StringBuffer sb=new StringBuffer();
54 |            sb.append("这是第"+i+"行 \n");//如果不加"/n"则不能实现换行。
55 |            System.out.print(sb.toString());
56 |            
57 |            out.write(sb.toString().getBytes("utf-8"));
58 |        }
59 |        out.close();
60 |    }
61 |    public void delfile(String filepath){
62 | 	   File file=new File(filepath);   
63 | 	       if(file.exists())   
64 | 	      {   
65 | 	           //file.createNewFile(); 
66 | 			   file.delete();   
67 | 	       }    
68 | 
69 |    }
70 | 	public void readF1() throws IOException {      
71 | 		
72 | 		//String filePath="scripts/clustering/canopy/canopy.dat";
73 | 		String filePath="datafile/naivebayes/train/out/result";
74 | 		BufferedReader br = new BufferedReader(new InputStreamReader(
75 |        new FileInputStream(filePath)));
76 |        for (String line = br.readLine(); line != null; line = br.readLine()) {
77 |            if(line.length()==0||"".equals(line))continue;
78 |        	String[] str=line.split(",");   
79 |        	
80 |        	   
81 |        }
82 |        br.close();
83 |        
84 |    }
85 | 
86 | 
87 | }
88 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/naivebayes/sequence/machinelearning/naivebayes/bayesdemo/Test.java:
--------------------------------------------------------------------------------
  1 | package sequence.machinelearning.naivebayes.bayesdemo;
  2 | 
  3 | import java.io.BufferedReader;
  4 | import java.io.File;
  5 | import java.io.FileReader;
  6 | import java.io.IOException;
  7 | import java.math.BigDecimal;
  8 | import java.util.ArrayList;
  9 | import java.util.HashMap;
 10 | import java.util.Map;
 11 | import java.util.regex.Matcher;
 12 | import java.util.regex.Pattern;
 13 | 
 14 | public class Test {
 15 | 
 16 | 	private static Map<String,Double> cmap=new HashMap<String,Double>();
 17 | 	private static Map<String,Double> pmap=new HashMap<String,Double>();
 18 |     public static final String patternString = "@decision(.*)[{](.*?)[}]";
 19 | 	public BigDecimal getProbability(String[] line,String decision){
 20 | 		
 21 | 		String ckey="P("+decision+")";
 22 | 		//获取P(yes)的概率
 23 | 		BigDecimal result=new BigDecimal(cmap.get(ckey));
 24 | 			for(int j=0;j<line.length;j++){
 25 | 				String attval=line[j].toString();
 26 | 				String pkey="P("+Train.lisatt.get(j)+"="+attval+"|"+decision+")";
 27 | 				//取得P(outlook=sunny|yes)的概率相
 28 | 				BigDecimal pi=new BigDecimal(pmap.get(pkey));
 29 | 				result=result.multiply(pi);
 30 | 			}
 31 | 		//System.out.println(arraytoString(line)+" 为"+decision+"的参考数值是："+result.toString().substring(0,5));
 32 | 		return result;
 33 | 	}
 34 | 	public void printResult(){
 35 | 		for(int i=0;i<Train.listdata.size();i++){
 36 | 			String[] line=Train.listdata.get(i);
 37 | 			BigDecimal p=new BigDecimal(0);
 38 | 			int index=-1;
 39 | 			for(int j=0;j<Train.sort.size();j++){
 40 | 				BigDecimal pnext=getProbability(line,Train.sort.get(j));
 41 | 				if(p.compareTo(pnext)==-1){
 42 | 					p=pnext;
 43 | 					index=j;
 44 | 				}
 45 | 			}
 46 | 			System.out.println(arraytoString(line)+"   判断的结果是："+Train.sort.get(index)+"	      --参考数值是："+p.toString().substring(0,5));
 47 | 		}
 48 | 	}
 49 | 	
 50 | 	public static void main(String[] args) {
 51 | 		// TODO Auto-generated method stub
 52 | 		Train train=new Train();
 53 | 		//读取测试集
 54 | 		train.readARFF(new File("datafile/naivebayes/test/in/test.arff"));
 55 | 		Test test=new Test();
 56 | 		//读取训练结果
 57 | 		test.readResult(new File("datafile/naivebayes/train/out/trainresult.arff"));
 58 | 		test.printResult();
 59 | 	}
 60 | 	//数组转字符串
 61 | 	public String arraytoString(String[] line){
 62 | 		
 63 | 		StringBuffer sb = new StringBuffer();
 64 | 		for(int i = 0; i < line.length; i++){
 65 | 		 sb. append(line[i]+",");
 66 | 		}
 67 |         String newStr = sb.toString();
 68 |         return newStr.substring(0, newStr.length()-1);
 69 | 	}
 70 |     //读取arff文件，给attribute、attributevalue、data赋值
 71 |     public void readResult(File file) {
 72 |         try {
 73 |             FileReader fr = new FileReader(file);
 74 |             BufferedReader br = new BufferedReader(fr);
 75 |             String line;
 76 |             Pattern pattern = Pattern.compile(patternString);
 77 |             while ((line = br.readLine()) != null) {
 78 |             	Matcher matcher = pattern.matcher(line);
 79 |                 if (matcher.find()) {
 80 |                 	String[] values = matcher.group(2).split(",");
 81 |                     Double val=Double.valueOf(values[0]);
 82 |                     cmap.put(matcher.group(1).trim(), val);
 83 |                 } else if (line.startsWith("@data")) {
 84 |                     while ((line = br.readLine()) != null) {
 85 |                         if(line=="")
 86 |                             continue;
 87 |                         String[] row = line.split(",");
 88 |                         Double val=Double.valueOf(row[1]);
 89 |                         pmap.put(row[0], val);
 90 |                     }
 91 |                 } else {
 92 |                     continue;
 93 |                 }
 94 |             }
 95 |             br.close();
 96 |         } catch (IOException e1) {
 97 |             e1.printStackTrace();
 98 |         }
 99 |     }
100 | 
101 | }
102 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/naivebayes/sequence/machinelearning/naivebayes/bayesdemo/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Jamas
6 |  *
7 |  */
8 | package sequence.machinelearning.naivebayes.bayesdemo;


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/naivebayes/sequence/machinelearning/naivebayes/textmining/ParticipleTest.java:
--------------------------------------------------------------------------------
 1 | package sequence.machinelearning.naivebayes.textmining;
 2 | 
 3 | import java.io.IOException;
 4 | 
 5 | import jeasy.analysis.MMAnalyzer;
 6 | 
 7 | /**
 8 |  * 极易分词测试
 9 |  * @author Administrator
10 |  *
11 |  */
12 | public class ParticipleTest {
13 | 
14 | 	public static void main(String[] args)      
15 | 	{      
16 | 	String text = "据路透社报道，印度尼西亚社会事务部一官员星期二(29日)表示，"      
17 | 	+ "日惹市附近当地时间27日晨5时53分发生的里氏6.2级地震已经造成至少5427人死亡，"      
18 | 	+ "20000余人受伤，近20万人无家可归。";      
19 | 	    
20 | 	MMAnalyzer analyzer = new MMAnalyzer();      
21 | 	try      
22 | 	{      
23 | 	//System.out.println(analyzer.segment(text, " | "));     
24 | 		System.out.println(analyzer.segment(text, "\n"));  
25 | 	}      
26 | 	catch (IOException e)      
27 | 	{      
28 | 	e.printStackTrace();      
29 | 	}      
30 | 	} 
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/com.homework/src/sequence/machinelearning/naivebayes/sequence/machinelearning/naivebayes/textmining/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Jamas
6 |  *
7 |  */
8 | package sequence.machinelearning.naivebayes.textmining;


--------------------------------------------------------------------------------
/com.homework/src/test/java/com/homework/AppTest.java:
--------------------------------------------------------------------------------
 1 | package com.homework;
 2 | 
 3 | import junit.framework.Test;
 4 | import junit.framework.TestCase;
 5 | import junit.framework.TestSuite;
 6 | 
 7 | /**
 8 |  * Unit test for simple App.
 9 |  */
10 | public class AppTest 
11 |     extends TestCase
12 | {
13 |     /**
14 |      * Create the test case
15 |      *
16 |      * @param testName name of the test case
17 |      */
18 |     public AppTest( String testName )
19 |     {
20 |         super( testName );
21 |     }
22 | 
23 |     /**
24 |      * @return the suite of tests being tested
25 |      */
26 |     public static Test suite()
27 |     {
28 |         return new TestSuite( AppTest.class );
29 |     }
30 | 
31 |     /**
32 |      * Rigourous Test :-)
33 |      */
34 |     public void testApp()
35 |     {
36 |         assertTrue( true );
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/com.homework/src/week2/business/DayIp.java:
--------------------------------------------------------------------------------
 1 | package business;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.Iterator;
 5 | 
 6 | import org.apache.hadoop.fs.Path;
 7 | import org.apache.hadoop.io.IntWritable;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapred.FileInputFormat;
10 | import org.apache.hadoop.mapred.FileOutputFormat;
11 | 
12 | import org.apache.hadoop.mapred.JobClient;
13 | import org.apache.hadoop.mapred.JobConf;
14 | import org.apache.hadoop.mapred.MapReduceBase;
15 | import org.apache.hadoop.mapred.Mapper;
16 | import org.apache.hadoop.mapred.OutputCollector;
17 | import org.apache.hadoop.mapred.Reducer;
18 | import org.apache.hadoop.mapred.Reporter;
19 | import org.apache.hadoop.mapred.TextInputFormat;
20 | import org.apache.hadoop.mapred.TextOutputFormat;
21 | 
22 | 
23 | import entity.Kpi;
24 | 
25 | public class DayIp {
26 |      public static class IpMapper extends MapReduceBase implements Mapper<Object,Text,Text,IntWritable>{
27 |         private final static  IntWritable one=new IntWritable(1);
28 | 		Text ip=new Text();
29 | 		@Override
30 | 		public void map(Object key, Text value,OutputCollector<Text, IntWritable> output, Reporter reporter)throws IOException {
31 | 			// TODO Auto-generated method stub
32 | 			Kpi kpi=new Kpi();
33 | 			kpi=Kpi.filterIPs(value.toString());
34 | 			if(kpi.isValid()==true){
35 | 			ip.set(kpi.getRemote_addr());
36 | 			output.collect(ip, one);
37 | 			}
38 | 		}
39 | }
40 | 	public static class IpReducer extends MapReduceBase implements Reducer<Text,IntWritable,Text,IntWritable>{
41 |         private  IntWritable sumresult=new IntWritable(0);
42 | 		//private final static IntWritable one =new IntWritable(1);
43 | 		private  int sum=0;
44 | 		
45 | 		@Override
46 | 		public void reduce(Text key, Iterator<IntWritable> values,OutputCollector<Text, IntWritable> output, Reporter reporter)throws IOException {
47 | 			// TODO Auto-generated method stub
48 | 			sum=sum+1;
49 | 			
50 | 			sumresult.set(sum);
51 | 			System.out.print(key+"is:"+sumresult);
52 | 			output.collect(key, sumresult);
53 | 		}
54 | 	}
55 | 	/**
56 | 	 * @param args
57 | 	 */
58 | 	public static void main(String[] args) throws Exception{
59 | 		// TODO Auto-generated method stub
60 |          String inpath="hdfs://10.6.3.200:9000/user/hdfs/in/";
61 |          String outpath="hdfs://10.6.3.200:9000/user/hdfs/ip_out/";
62 |          
63 |          JobConf conf=new JobConf(DayIp.class);
64 |          conf.setJobName("depend ip count is:");
65 |          
66 |          conf.setMapOutputKeyClass(Text.class);
67 |          conf.setMapOutputValueClass(IntWritable.class);
68 |          
69 |          conf.setOutputKeyClass(Text.class);
70 |          conf.setOutputValueClass(IntWritable.class);
71 |          
72 |          conf.setMapperClass(IpMapper.class);
73 |          conf.setReducerClass(IpReducer.class);
74 |          conf.setCombinerClass(IpReducer.class);
75 |          
76 |          conf.setInputFormat(TextInputFormat.class);
77 |          conf.setOutputFormat(TextOutputFormat.class);
78 |          
79 |          FileInputFormat.setInputPaths(conf, new Path(inpath));
80 |          FileOutputFormat.setOutputPath(conf,new Path(outpath));
81 |          
82 |          JobClient.runJob(conf);
83 |          System.out.println("finish");
84 |          System.exit(0);
85 |          
86 | 	}
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/com.homework/src/week2/business/StatPV.java:
--------------------------------------------------------------------------------
 1 | package business;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.Iterator;
 5 | 
 6 | import org.apache.hadoop.fs.Path;
 7 | import org.apache.hadoop.io.IntWritable;
 8 | import org.apache.hadoop.io.Text;
 9 | import org.apache.hadoop.mapred.FileInputFormat;
10 | import org.apache.hadoop.mapred.FileOutputFormat;
11 | import org.apache.hadoop.mapred.JobClient;
12 | import org.apache.hadoop.mapred.JobConf;
13 | import org.apache.hadoop.mapred.MapReduceBase;
14 | import org.apache.hadoop.mapred.Mapper;
15 | import org.apache.hadoop.mapred.OutputCollector;
16 | import org.apache.hadoop.mapred.Reducer;
17 | import org.apache.hadoop.mapred.Reporter;
18 | import org.apache.hadoop.mapred.TextInputFormat;
19 | import org.apache.hadoop.mapred.TextOutputFormat;
20 | 
21 | import entity.Kpi;
22 | 
23 | public class StatPV {
24 | 
25 | 	private static class PvMapper extends MapReduceBase implements Mapper<Object,Text,Text,IntWritable>{
26 | 
27 | 		private IntWritable one=new IntWritable(1);
28 | 		private Text pvtxt=new Text();
29 | 		@Override
30 | 		public void map(Object key, Text value,OutputCollector<Text, IntWritable> output, Reporter reporter)throws IOException {
31 | 			// TODO Auto-generated method stub
32 | 			try {
33 | 				Kpi kpi=Kpi.filterPVs(value.toString());
34 | 				pvtxt.set("pv");
35 | 				output.collect(pvtxt, one);
36 | 			} catch (Exception e) {
37 | 				// TODO Auto-generated catch block
38 | 				e.printStackTrace();
39 | 			}
40 | 		}
41 | 		
42 | 	}
43 | 	private static class PvReducer extends MapReduceBase implements Reducer<Text,IntWritable,Text,IntWritable>{
44 | 
45 | 		private IntWritable result=new IntWritable(0);
46 | 		 
47 | 		@Override
48 | 		public void reduce(Text key, Iterator<IntWritable> values,OutputCollector<Text, IntWritable> output, Reporter reporter)throws IOException {
49 | 			// TODO Auto-generated method stub
50 | 			int sum=0;
51 | 			try {
52 | 				while(values.hasNext()){
53 | 					
54 | 					sum=sum+ values.next().get();
55 | 				}
56 | 				result.set(sum);
57 | 				output.collect(key, result);
58 | 			} catch (Exception e) {
59 | 				// TODO Auto-generated catch block
60 | 				e.printStackTrace();
61 | 			}
62 | 		}
63 | 	}
64 | 	public static void main(String[] args) throws IOException {
65 |         String inpath="hdfs://localhost:9000/user/hdfs/in/";
66 |         String outpath="hdfs://localhost:9000/user/hdfs/pv_out/";
67 |         JobConf conf=new JobConf(StatPV.class);
68 |         conf.setJobName("StatPV");
69 |         conf.setMapperClass(PvMapper.class);
70 |         conf.setCombinerClass(PvReducer.class);
71 |         conf.setReducerClass(PvReducer.class);
72 |        
73 |         
74 |         conf.setOutputKeyClass(Text.class);
75 |         conf.setOutputValueClass(IntWritable.class);
76 |         
77 |         conf.setMapOutputKeyClass(Text.class);
78 |         conf.setMapOutputValueClass(IntWritable.class);
79 |         
80 |         conf.setInputFormat(TextInputFormat.class);
81 |         conf.setOutputFormat(TextOutputFormat.class);
82 |         
83 |         FileInputFormat.setInputPaths(conf, new Path(inpath));
84 |         FileOutputFormat.setOutputPath(conf, new Path(outpath));
85 |         JobClient.runJob(conf);
86 |         System.out.println("finish");
87 |         System.exit(0);
88 | 	}
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/com.homework/src/week2/business/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author hadoop
6 |  *
7 |  */
8 | package business;


--------------------------------------------------------------------------------
/com.homework/src/week2/entity/Kpi.java:
--------------------------------------------------------------------------------
  1 | package entity;
  2 | 
  3 | import java.util.HashSet;
  4 | import java.util.Set;
  5 | 
  6 | 
  7 | 
  8 | 
  9 | 
 10 | public class Kpi {
 11 | 
 12 |     private String remote_addr;// 记录客户端的ip地址
 13 |     private String remote_user;// 记录客户端用户名称,忽略属性"-"
 14 |     private String time_local;// 记录访问时间与时区
 15 |     private String request;// 记录请求的url与http协议
 16 |     private String status;// 记录请求状态；成功是200
 17 |     private String body_bytes_sent;// 记录发送给客户端文件主体内容大小
 18 |     private String http_referer;// 用来记录从那个页面链接访问过来的
 19 |     private String http_user_agent;// 记录客户浏览器的相关信息
 20 | 
 21 |     private boolean valid = true;// 判断数据是否合法
 22 | 
 23 |     private static Kpi parser(String line) {
 24 |         //System.out.println(line);
 25 |         Kpi kpi = new Kpi();
 26 |         try {
 27 | 			String[] arr = line.split(" ");
 28 | 			if (arr.length > 11) {
 29 | 			    kpi.setRemote_addr(arr[0]);
 30 | 			    kpi.setRemote_user(arr[1]);
 31 | 			    kpi.setTime_local(arr[3].substring(1));
 32 | 			    kpi.setRequest(arr[6]);
 33 | 			    kpi.setStatus(arr[8]);
 34 | 			    kpi.setBody_bytes_sent(arr[9]);
 35 | 			    kpi.setHttp_referer(arr[10]);
 36 | 			    
 37 | 			    if (arr.length > 12) {
 38 | 			        kpi.setHttp_user_agent(arr[11] + " " + arr[12]);
 39 | 			    } else {
 40 | 			        kpi.setHttp_user_agent(arr[11]);
 41 | 			    }
 42 | 
 43 | 			    if (Integer.parseInt(kpi.getStatus()) >= 400) {// 大于400，HTTP错误
 44 | 			        kpi.setValid(false);
 45 | 			    }
 46 | 			} else {
 47 | 			    kpi.setValid(false);
 48 | 			}
 49 | 		} catch (NumberFormatException e) {
 50 | 			// TODO Auto-generated catch block
 51 | 			e.printStackTrace();
 52 | 		}
 53 |         
 54 |         
 55 |         return kpi;
 56 |     }
 57 |     public static Kpi filterIPs(String line) {
 58 |         
 59 |     	Kpi kpi=new Kpi();
 60 | 		try {
 61 | 			kpi = parser(line);
 62 | 			int n1=kpi.getRequest().indexOf(".php");
 63 | 			int n2=kpi.getRequest().indexOf(".html");
 64 | 			if((n1+n2)==-2){
 65 | 				kpi.setValid(false);
 66 | 			}
 67 | 		} catch (Exception e) {
 68 | 			// TODO Auto-generated catch block
 69 | 			e.printStackTrace();
 70 | 		}
 71 |         return kpi;
 72 |     }
 73 |     public static Kpi filterPVs(String line) {
 74 |         
 75 |     	Kpi kpi=new Kpi();
 76 | 		try {
 77 | 			kpi = parser(line);
 78 | 			int n1=kpi.getRequest().indexOf(".php");
 79 | 			int n2=kpi.getRequest().indexOf(".html");
 80 | 			if((n1+n2)==-2){
 81 | 				kpi.setValid(false);
 82 | 			}
 83 | 			int n3=kpi.getRequest().indexOf("baidu");
 84 | 			int n4=kpi.getRequest().indexOf("google");
 85 | 			if((n3+n4)!=-2){
 86 | 				kpi.setValid(false);
 87 | 			}
 88 | 		} catch (Exception e) {
 89 | 			// TODO Auto-generated catch block
 90 | 			e.printStackTrace();
 91 | 		}
 92 |         return kpi;
 93 |     }
 94 |     
 95 | 	public String getRemote_addr() {
 96 | 		return remote_addr;
 97 | 	}
 98 | 
 99 | 	public void setRemote_addr(String remote_addr) {
100 | 		this.remote_addr = remote_addr;
101 | 	}
102 | 
103 | 	public String getRemote_user() {
104 | 		return remote_user;
105 | 	}
106 | 
107 | 	public void setRemote_user(String remote_user) {
108 | 		this.remote_user = remote_user;
109 | 	}
110 | 
111 | 	public String getTime_local() {
112 | 		return time_local;
113 | 	}
114 | 
115 | 	public void setTime_local(String time_local) {
116 | 		this.time_local = time_local;
117 | 	}
118 | 
119 | 	public String getRequest() {
120 | 		return request;
121 | 	}
122 | 
123 | 	public void setRequest(String request) {
124 | 		this.request = request;
125 | 	}
126 | 
127 | 	public String getStatus() {
128 | 		return status;
129 | 	}
130 | 
131 | 	public void setStatus(String status) {
132 | 		this.status = status;
133 | 	}
134 | 
135 | 	public String getBody_bytes_sent() {
136 | 		return body_bytes_sent;
137 | 	}
138 | 
139 | 	public void setBody_bytes_sent(String body_bytes_sent) {
140 | 		this.body_bytes_sent = body_bytes_sent;
141 | 	}
142 | 
143 | 	public String getHttp_referer() {
144 | 		return http_referer;
145 | 	}
146 | 
147 | 	public void setHttp_referer(String http_referer) {
148 | 		this.http_referer = http_referer;
149 | 	}
150 | 
151 | 	public String getHttp_user_agent() {
152 | 		return http_user_agent;
153 | 	}
154 | 
155 | 	public void setHttp_user_agent(String http_user_agent) {
156 | 		this.http_user_agent = http_user_agent;
157 | 	}
158 | 
159 | 	public boolean isValid() {
160 | 		return valid;
161 | 	}
162 | 
163 | 	public void setValid(boolean valid) {
164 | 		this.valid = valid;
165 | 	}
166 |     
167 |     
168 | }
169 | 


--------------------------------------------------------------------------------
/com.homework/src/week2/entity/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author hadoop
6 |  *
7 |  */
8 | package entity;


--------------------------------------------------------------------------------
/com.homework/src/week3/mine/Outinfo.java:
--------------------------------------------------------------------------------
 1 | package mine;
 2 | 
 3 | import org.apache.hadoop.io.IntWritable;
 4 | import org.apache.hadoop.io.Text;
 5 | 
 6 | public class Outinfo {
 7 | 
 8 | 	 private Text outkey;
 9 | 	 private Text outvalue;
10 | 	 private boolean outValidate=true;
11 | 	 
12 | 	public Text getOutkey() {
13 | 		return outkey;
14 | 	}
15 | 	public void setOutkey(Text outkey) {
16 | 		this.outkey = outkey;
17 | 	}
18 | 	
19 | 	public Text getOutvalue() {
20 | 		return outvalue;
21 | 	}
22 | 	public void setOutvalue(Text outvalue) {
23 | 		this.outvalue = outvalue;
24 | 	}
25 | 	public boolean isOutValidate() {
26 | 		return outValidate;
27 | 	}
28 | 	public void setOutValidate(boolean outValidate) {
29 | 		this.outValidate = outValidate;
30 | 	} 
31 | 	 
32 | 	 
33 | }
34 | 


--------------------------------------------------------------------------------
/com.homework/src/week3/mine/StationInfo.java:
--------------------------------------------------------------------------------
  1 | package mine;
  2 | 
  3 | import java.text.ParseException;
  4 | import java.text.SimpleDateFormat;
  5 | import java.util.Date;
  6 | 
  7 | import org.apache.hadoop.io.IntWritable;
  8 | import org.apache.hadoop.io.Text;
  9 | 
 10 | 
 11 | 
 12 | public class StationInfo {
 13 | 
 14 | 	 private String imsi;
 15 | 	 private String imei;
 16 | 	 private String updatetype;
 17 | 	 private String local;
 18 | 	 private Date time;
 19 | 	 private String url;
 20 | 	 private Integer type;
 21 | 	 private boolean validate=true;
 22 | 	 
 23 | 	 //type=0表示POS位置信息，1表示NET上网记录
 24 | 	 private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 25 | 	 
 26 | 	 public StationInfo init(String line,int type) throws ParseException{
 27 | 		 
 28 | 		StationInfo info= new StationInfo();
 29 | 		if(line==null||line.trim().length()<=0){
 30 | 			info.setValidate(false);
 31 | 			return info;
 32 | 		}
 33 | 		try {
 34 | 			 
 35 | 			 if(line.length()>0){
 36 | 				 String[] arr=line.split("\t");
 37 | 				 if(arr.length!=5){
 38 | 					 info.setValidate(false);
 39 | 					 return info;
 40 | 				 }
 41 | 				 if(type==0){
 42 | 					 info.setImsi(arr[0]);
 43 | 					 info.setImei(arr[1]);
 44 | 					 info.setUpdatetype(arr[2]);
 45 | 					 info.setLocal(arr[3]);
 46 | 					 info.setTime(this.formatter.parse(arr[4]));
 47 | 				 }else if(type==1){
 48 | 					 info.setImsi(arr[0]);
 49 | 					 info.setImei(arr[1]);
 50 | 					 info.setLocal(arr[2]);
 51 | 					 info.setTime(this.formatter.parse(arr[3]));
 52 | 					 info.setUrl(arr[4]);
 53 | 				 }
 54 | 			 }
 55 | 			 else{
 56 | 				 info.setValidate(false);
 57 | 			 }
 58 | 		} catch (Exception e) {
 59 | 			// TODO Auto-generated catch block
 60 | 			e.printStackTrace();
 61 | 			 info.setValidate(false);
 62 | 			 return info;
 63 | 		}
 64 | 		 return info;
 65 | 		 
 66 | 	 }
 67 | 	 //date表示所要计算的日期
 68 | 	 public Outinfo output(String line,int type,String date,String[] timepoint) throws ParseException{
 69 | 		 
 70 | 		 StationInfo info=new StationInfo();
 71 | 		 Outinfo outinfo=new Outinfo();
 72 | 		 String timeFlag;
 73 | 		 
 74 | 		 try {
 75 | 			info=info.init(line, type);
 76 | 			if(!info.isValidate()){
 77 | 				outinfo.setOutValidate(false);
 78 | 				return outinfo;
 79 | 			}
 80 | 			 String dateValue= formatter.format(info.getTime());
 81 | 			 if(!dateValue.startsWith(date)){
 82 | 				 outinfo.setOutValidate(false);
 83 | 			 }
 84 | 			 
 85 | 				//计算所属时间段
 86 | 				int i = 0, n = timepoint.length;
 87 | 				int hour = Integer.valueOf( dateValue.split(" ")[1].split(":")[0] );
 88 | 				while ( i < n && Integer.valueOf( timepoint[i] ) <= hour )
 89 | 					i++;
 90 | 				if ( i < n )
 91 | 				{
 92 | 					if ( i == 0 )
 93 | 						timeFlag = ( "00-" + timepoint[i] );
 94 | 					else
 95 | 						timeFlag = ( timepoint[i-1] + "-" + timepoint[i] );
 96 | 				}
 97 | 				else 									//Hour大于最大的时间点
 98 | 					timeFlag="unknow";
 99 | 				String outkey=info.getImsi()+"|"+timeFlag;
100 | 			    Text keytext=new Text();
101 | 			    Text valuenum=new Text();
102 | 			    long t=(info.getTime().getTime()/1000l);
103 | 			    		
104 | 			    valuenum.set(info.getLocal()+"|"+String.valueOf(t));
105 | 			    keytext.set(outkey.toString());
106 | 				outinfo.setOutkey(keytext);
107 | 				outinfo.setOutvalue(valuenum);
108 | 		} catch (NumberFormatException e) {
109 | 			// TODO Auto-generated catch block
110 | 			e.printStackTrace();
111 | 			outinfo.setOutValidate(false);
112 | 			return outinfo;
113 | 		}
114 | 			
115 | 		 return outinfo;
116 | 	 }
117 | 	 
118 | 	 
119 | 	 
120 | 	public String getImsi() {
121 | 		return imsi;
122 | 	}
123 | 	public void setImsi(String imsi) {
124 | 		this.imsi = imsi;
125 | 	}
126 | 	public String getImei() {
127 | 		return imei;
128 | 	}
129 | 	public void setImei(String imei) {
130 | 		this.imei = imei;
131 | 	}
132 | 	public String getUpdatetype() {
133 | 		return updatetype;
134 | 	}
135 | 	public void setUpdatetype(String updatetype) {
136 | 		this.updatetype = updatetype;
137 | 	}
138 | 	public String getLocal() {
139 | 		return local;
140 | 	}
141 | 	public void setLocal(String local) {
142 | 		this.local = local;
143 | 	}
144 | 	public Date getTime() {
145 | 		return time;
146 | 	}
147 | 	public void setTime(Date time) {
148 | 		this.time = time;
149 | 	}
150 | 	public String getUrl() {
151 | 		return url;
152 | 	}
153 | 	public void setUrl(String url) {
154 | 		this.url = url;
155 | 	}
156 | 
157 | 
158 | 
159 | 	public boolean isValidate() {
160 | 		return validate;
161 | 	}
162 | 
163 | 
164 | 
165 | 	public void setValidate(boolean validate) {
166 | 		this.validate = validate;
167 | 	}
168 | 
169 | 
170 | 
171 | 	public Integer getType() {
172 | 		return type;
173 | 	}
174 | 
175 | 
176 | 
177 | 	public void setType(Integer type) {
178 | 		this.type = type;
179 | 	}
180 | 	 
181 | 	 
182 | 	
183 | }
184 | 


--------------------------------------------------------------------------------
/com.homework/src/week3/mine/StayTime2改造前备份.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/src/week3/mine/StayTime2改造前备份.rar


--------------------------------------------------------------------------------
/com.homework/src/week3/mine/my.net:
--------------------------------------------------------------------------------
1 | 0000000001	0000000001	10000001	2014-03-19 08:50:00	www.baidu.com
2 | 0000000002	0000000002	20000001	2014-03-19 07:20:00	www.baidu.com
3 | 0000000003	0000000003	30000001	2014-03-19 08:10:00	www.google.com
4 | 
5 | 


--------------------------------------------------------------------------------
/com.homework/src/week3/mine/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author hadoop
6 |  *
7 |  */
8 | package mine;


--------------------------------------------------------------------------------
/com.homework/src/week3/tutorial/TableLine.java:
--------------------------------------------------------------------------------
  1 | /**  
  2 |  * 本包为 Dataguru.cn Hadoop 实战案例课程程序
  3 |  * 编写者：James. 
  4 |  */  
  5 | package tutorial;
  6 | 
  7 | import java.text.ParseException;
  8 | import java.text.SimpleDateFormat;
  9 | import java.util.Date;
 10 | 
 11 | import org.apache.hadoop.io.Text;
 12 | 
 13 | /**
 14 |  * 定义异常类
 15 |  */
 16 | class LineException extends Exception
 17 | {
 18 | 	private static final long serialVersionUID = 8245008693589452584L;
 19 | 	int flag;
 20 | 	public LineException(String msg, int flag)
 21 | 	{
 22 | 		super(msg);
 23 | 		this.flag = flag;
 24 | 	}
 25 | 	public int getFlag()
 26 | 	{
 27 | 		return flag;
 28 | 	}
 29 | }
 30 | 
 31 | 
 32 | /**  
 33 |  * 读取一行数据
 34 |  * 提取所要字段
 35 |  */  
 36 | public class TableLine 
 37 | {
 38 | 	private String imsi, position, time, timeFlag;
 39 | 	private Date day;
 40 | 	private SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
 41 | 	
 42 | 	/**  
 43 | 	 * 初始化并检查该行的合法性
 44 | 	 */  
 45 | 	public void set ( String line, boolean source, String date, String [] timepoint ) throws LineException
 46 | 	{
 47 | 		String [] lineSplit = line.split("\t");
 48 | 		if( source )
 49 | 		{
 50 | 			this.imsi = lineSplit[0];
 51 | 			this.position = lineSplit[3];
 52 | 			this.time = lineSplit[4];
 53 | 		}
 54 | 		else
 55 | 		{
 56 | 			this.imsi = lineSplit[0];
 57 | 			this.position = lineSplit[2];
 58 | 			this.time = lineSplit[3];
 59 | 		}
 60 | 		
 61 | 		//检查日期合法性
 62 | 		if ( ! this.time.startsWith(date) )			//年月日必须与date一致
 63 | 			throw new LineException("", -1);
 64 | 		
 65 | 		try
 66 | 		{
 67 | 			this.day = this.formatter.parse(this.time);
 68 | 		}
 69 | 		catch ( ParseException e )
 70 | 		{
 71 | 			throw new LineException("", 0);
 72 | 		}
 73 | 		
 74 | 		//计算所属时间段
 75 | 		int i = 0, n = timepoint.length;
 76 | 		int hour = Integer.valueOf( this.time.split(" ")[1].split(":")[0] );
 77 | 		while ( i < n && Integer.valueOf( timepoint[i] ) <= hour )
 78 | 			i++;
 79 | 		if ( i < n )
 80 | 		{
 81 | 			if ( i == 0 )
 82 | 				this.timeFlag = ( "00-" + timepoint[i] );
 83 | 			else
 84 | 				this.timeFlag = ( timepoint[i-1] + "-" + timepoint[i] );
 85 | 		}
 86 | 		else 									//Hour大于最大的时间点
 87 | 			throw new LineException("", -1);
 88 | 	}
 89 | 	
 90 | 	/**  
 91 | 	 * 输出KEY
 92 | 	 */  
 93 | 	public Text outKey()
 94 | 	{
 95 | 		return new Text ( this.imsi + "|" + this.timeFlag );
 96 | 	}
 97 | 	
 98 | 	/**  
 99 | 	 * 输出VALUE
100 | 	 */  
101 | 	public Text outValue()
102 | 	{
103 | 		long t = ( day.getTime() / 1000L );						//用时间的偏移量作为输出时间
104 | 		return new Text ( this.position + "|" + String.valueOf(t) );
105 | 	}
106 | }


--------------------------------------------------------------------------------
/com.homework/src/week3/tutorial/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author hadoop
6 |  *
7 |  */
8 | package tutorial;


--------------------------------------------------------------------------------
/com.homework/src/week5/matrix/Multiply.java:
--------------------------------------------------------------------------------
 1 | package matrix;
 2 | 
 3 | public class Multiply {
 4 | 
 5 | 	public static void main(String[] args) {
 6 | 		// TODO Auto-generated method stub
 7 |         int[][] a={{1,0,3,-1},{2,1,0,2}};
 8 |         int[][] b={{4,1,0},{-1,1,3},{2,0,1},{1,3,4}};
 9 |         int[][] c=new int[2][3];
10 |         //int[][] c;
11 |         for(int i=0;i<a.length;i++){
12 |             for(int j=0;j<b[0].length;j++){
13 |             	for(int k=0;k<b.length;k++){
14 |             		c[i][j]=c[i][j]+a[i][k]*b[k][j];
15 |             	}
16 |             }
17 |         }
18 |        
19 |         for(int i=0;i<c.length;i++){
20 |         	for(int j=0;j<c[0].length;j++){
21 |         		System.out.print(c[i][j]+"  ");
22 |         	}
23 |         	System.out.println();
24 |         }
25 |         
26 | 		
27 | 	}
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/com.homework/src/week5/matrix/Recommend.java:
--------------------------------------------------------------------------------
 1 | package matrix;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.Map;
 5 | import java.util.regex.Pattern;
 6 | 
 7 | import org.apache.hadoop.mapred.JobConf;
 8 | 
 9 | 
10 | public class Recommend {
11 | 
12 |     public static final String HDFS = "hdfs://192.168.0.200:9000/user/hdfs/";
13 |     public static final Pattern DELIMITER = Pattern.compile("[\t,]");
14 | 
15 |     public static void main(String[] args) throws Exception {
16 |         Map<String, String> path = new HashMap<String, String>();
17 |         //hdfs://localhost:9000/user/hdfs/in/
18 |         path.put("matrixMult", HDFS+"Mult/");
19 |         path.put("matrixMultOut", HDFS+"/Mult/Out/");
20 |         //Step1.run(path);
21 |         // MyTest.run(path);
22 |         SparseMatrix.run(path);
23 |         //Step3.run1(path);
24 |         //Step3.run2(path);
25 | //        Step4.run(path);
26 |         
27 |         //Step4_Update.run(path);
28 |         //Step4_Update2.run(path);
29 |         
30 |         
31 | //        // hadoop fs -cat /user/hdfs/recommend/step4/part-00000
32 | //        JobConf conf = config();
33 | //        HdfsDAO hdfs = new HdfsDAO(HDFS, conf);
34 | //        hdfs.cat("/user/hdfs/recommend/step4/part-00000");
35 |         
36 |         System.exit(0);
37 |     }
38 | 
39 |     public static JobConf config() {
40 |         JobConf conf = new JobConf(Recommend.class);
41 |         conf.setJobName("Recommand");
42 |         //conf.addResource("classpath:/hadoop/core-site.xml");
43 |         //conf.addResource("classpath:/hadoop/hdfs-site.xml");
44 |         //conf.addResource("classpath:/hadoop/mapred-site.xml");
45 |         conf.set("io.sort.mb", "1024");
46 |         return conf;
47 |     }
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/com.homework/src/week5/matrix/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package matrix;


--------------------------------------------------------------------------------
/com.homework/src/week5/recommend/MainPodium.java:
--------------------------------------------------------------------------------
 1 | package recommend;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.Map;
 5 | import java.util.regex.Pattern;
 6 | 
 7 | 
 8 | 
 9 | import org.apache.hadoop.mapred.JobConf;
10 | 
11 | public class MainPodium {
12 |     public static final String HDFS = "hdfs://192.168.0.200:9000/user/hdfs/week5/";
13 |     public static final Pattern DELIMITER = Pattern.compile("[\t,]");
14 |     public static final String Step1In = HDFS+"step1In/";
15 |     public static final String Step1Out = HDFS+"step1Out/";
16 |     
17 |     public static final String Step2In = Step1Out;
18 |     public static final String Step2Out = HDFS+"step2Out/";
19 |     
20 |     public static final String Step3In = Step1Out;
21 |     public static final String Step3Out = HDFS+"step3Out/";
22 |     
23 |     public static final String Step4In1 = Step2Out;
24 |     public static final String Step4In2 = Step3Out;
25 |     public static final String Step4Out = HDFS+"step4Out/";
26 |     
27 |     
28 |     public static void main(String[] args) throws Exception {
29 |         
30 |     	Map<String, String> path = new HashMap<String, String>();
31 |     	path.put("data", "datafile/week5/small.csv");
32 |         path.put("Step1In", Step1In);
33 |         path.put("Step1Out", Step1Out);
34 |         
35 |         path.put("Step2In", Step2In);
36 |         path.put("Step2Out", Step2Out);
37 |         
38 |         path.put("Step3In", Step3In);
39 |         path.put("Step3Out", Step3Out);
40 |         
41 |         path.put("Step4In1", Step4In1);
42 |         path.put("Step4In2", Step4In2);
43 |         path.put("Step4Out", Step4Out);
44 |         Step1.run(path);
45 |         Step2.run(path);
46 |         Step3.run(path);
47 |         
48 |        Step4.run(path);
49 |         
50 |         //Step4_Update.run(path);
51 |         //Step4_Update2.run(path);
52 |         
53 |         
54 | //        // hadoop fs -cat /user/hdfs/recommend/step4/part-00000
55 | //        JobConf conf = config();
56 | //        HdfsDAO hdfs = new HdfsDAO(HDFS, conf);
57 | //        hdfs.cat("/user/hdfs/recommend/step4/part-00000");
58 |         
59 |         System.exit(0);
60 |     }
61 |     public static JobConf config() {
62 |         JobConf conf = new JobConf(MainPodium.class);
63 |         conf.setJobName("Recommand");
64 |         //conf.addResource("classpath:/hadoop/core-site.xml");
65 |         //conf.addResource("classpath:/hadoop/hdfs-site.xml");
66 |         //conf.addResource("classpath:/hadoop/mapred-site.xml");
67 |         conf.set("io.sort.mb", "1024");
68 |         return conf;
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/com.homework/src/week5/recommend/Step1.java:
--------------------------------------------------------------------------------
 1 | package recommend;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.Iterator;
 5 | import java.util.Map;
 6 | 
 7 | import com.homework.hdfs.HdfsDAO;
 8 | 
 9 | import org.apache.hadoop.fs.Path;
10 | import org.apache.hadoop.io.IntWritable;
11 | import org.apache.hadoop.io.Text;
12 | import org.apache.hadoop.mapred.FileInputFormat;
13 | import org.apache.hadoop.mapred.FileOutputFormat;
14 | import org.apache.hadoop.mapred.JobClient;
15 | import org.apache.hadoop.mapred.JobConf;
16 | import org.apache.hadoop.mapred.MapReduceBase;
17 | import org.apache.hadoop.mapred.Mapper;
18 | import org.apache.hadoop.mapred.OutputCollector;
19 | import org.apache.hadoop.mapred.Reducer;
20 | import org.apache.hadoop.mapred.Reporter;
21 | import org.apache.hadoop.mapred.RunningJob;
22 | import org.apache.hadoop.mapred.TextInputFormat;
23 | import org.apache.hadoop.mapred.TextOutputFormat;
24 | 
25 | /*得出以下结果
26 | 1       102:3.0,103:2.5,101:5.0
27 | 2       101:2.0,102:2.5,103:5.0,104:2.0
28 | 3       107:5.0,101:2.0,104:4.0,105:4.5
29 | 4       101:5.0,103:3.0,104:4.5,106:4.0
30 | 5       101:4.0,102:3.0,103:2.0,104:4.0,105:3.5,106:4.0*/
31 | public class Step1 {
32 | 
33 | 	public static class Step1Map extends MapReduceBase implements Mapper<Object,Text,Text,Text>{
34 | 
35 | 		@Override
36 | 		public void map(Object key, Text value,OutputCollector<Text, Text> output, Reporter reporter)throws IOException {
37 | 			String[] tokens=MainPodium.DELIMITER.split(value.toString());
38 | 			Text k=new Text();
39 | 			Text v=new Text();
40 | 			k.set(tokens[0]);
41 | 			v.set(tokens[1]+":"+tokens[2]);
42 | 			output.collect(k, v);	
43 | 		}
44 | 		
45 | 	}
46 | 	public static class Step1Reduce extends MapReduceBase implements Reducer<Text,Text,Text,Text>{
47 | 
48 | 		@Override
49 | 		public void reduce(Text key, Iterator<Text> values,OutputCollector<Text, Text> output, Reporter reporter)throws IOException {
50 | 			Text v=new Text();
51 | 			String str="";
52 | 			while(values.hasNext()){
53 | 				str=str+values.next()+",";
54 | 			}
55 | 			int n=str.lastIndexOf(",");
56 | 			
57 | 			v.set(str.substring(0,n));	
58 | 			output.collect(key, v);
59 | 			
60 | 		}
61 |      }
62 | 	
63 |     public static void run(Map<String, String> path) throws IOException {
64 |         JobConf conf = MainPodium.config();
65 | 
66 |         String input = path.get("Step1In");
67 |         String output = path.get("Step1Out");
68 | 
69 |         HdfsDAO hdfs = new HdfsDAO(MainPodium.HDFS, conf);
70 | //        hdfs.rmr(output);
71 |         hdfs.rmr(output);
72 |         hdfs.rmr(input);
73 |         hdfs.mkdirs(input);
74 |        hdfs.copyFile(path.get("data"), input);
75 | 
76 |         conf.setMapOutputKeyClass(Text.class);
77 |         conf.setMapOutputValueClass(Text.class);
78 | 
79 |         conf.setOutputKeyClass(Text.class);
80 |         conf.setOutputValueClass(Text.class);
81 | 
82 |         conf.setMapperClass(Step1Map.class);
83 |         conf.setCombinerClass(Step1Reduce.class);
84 |         conf.setReducerClass(Step1Reduce.class);
85 | 
86 |         conf.setInputFormat(TextInputFormat.class);
87 |         conf.setOutputFormat(TextOutputFormat.class);
88 | 
89 |         FileInputFormat.setInputPaths(conf, new Path(input));
90 |         FileOutputFormat.setOutputPath(conf, new Path(output));
91 | 
92 |         RunningJob job = JobClient.runJob(conf);
93 |         while (!job.isComplete()) {
94 |             job.waitForCompletion();
95 |         }
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/com.homework/src/week5/recommend/Step2.java:
--------------------------------------------------------------------------------
 1 | package recommend;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.Iterator;
 5 | import java.util.Map;
 6 | 
 7 | import  com.homework.hdfs.HdfsDAO;
 8 | 
 9 | import org.apache.hadoop.fs.Path;
10 | import org.apache.hadoop.io.IntWritable;
11 | import org.apache.hadoop.io.LongWritable;
12 | import org.apache.hadoop.io.NullWritable;
13 | import org.apache.hadoop.io.Text;
14 | import org.apache.hadoop.mapred.FileInputFormat;
15 | import org.apache.hadoop.mapred.FileOutputFormat;
16 | import org.apache.hadoop.mapred.JobClient;
17 | import org.apache.hadoop.mapred.JobConf;
18 | import org.apache.hadoop.mapred.MapReduceBase;
19 | import org.apache.hadoop.mapred.Mapper;
20 | import org.apache.hadoop.mapred.OutputCollector;
21 | import org.apache.hadoop.mapred.Reducer;
22 | import org.apache.hadoop.mapred.Reporter;
23 | import org.apache.hadoop.mapred.RunningJob;
24 | import org.apache.hadoop.mapred.TextInputFormat;
25 | import org.apache.hadoop.mapred.TextOutputFormat;
26 | 
27 | /*得出物品同现矩阵*/
28 | public class Step2 {
29 | 
30 | 	public static class Step2Mapper extends MapReduceBase implements Mapper<Object,Text,Text,IntWritable>{
31 | 
32 | 		private final static IntWritable v = new IntWritable(1);
33 | 		@Override
34 | 		public void map(Object key, Text value,OutputCollector<Text, IntWritable> output, Reporter reporter)
35 | 				throws IOException {
36 | 			String[] tokens=MainPodium.DELIMITER.split(value.toString());
37 | 			Text k=new Text();
38 | 			
39 | 			for(int i=1;i<tokens.length;i++){
40 | 				String item1=tokens[i].split(":")[0];
41 | 				for(int j=1;j<tokens.length;j++){
42 | 					String item2=tokens[j].split(":")[0];
43 | 					k.set(item1+","+item2);
44 | 					output.collect(k, v);
45 | 				}
46 | 			}
47 | 			
48 | 		}
49 | 		
50 | 	}
51 | 	public static class Step2Reduce extends MapReduceBase implements Reducer<Text,IntWritable,Text,IntWritable>{
52 | 		 
53 | 		@Override
54 | 		public void reduce(Text key, Iterator<IntWritable> values,OutputCollector<Text, IntWritable> output, Reporter reporter)
55 | 				throws IOException {
56 | 				Integer sum=0;
57 | 				while(values.hasNext()){
58 | 					sum=sum+values.next().get();
59 | 				}
60 | 				IntWritable result = new IntWritable();
61 | 				//result.set(key+","+sum.toString());
62 | 				result.set(sum);
63 | 				output.collect(key,result);
64 | 		}
65 | 		
66 | 	}
67 |     public static void run(Map<String, String> path) throws IOException {
68 |         JobConf conf = MainPodium.config();
69 | 
70 |         String input = path.get("Step2In"); 
71 |         String output = path.get("Step2Out");
72 | 
73 |         HdfsDAO hdfs = new HdfsDAO(MainPodium.HDFS, conf);
74 |         hdfs.rmr(output);
75 |         
76 |         //conf.setMapOutputKeyClass(Text.class);
77 |         //conf.setMapOutputValueClass(IntWritable.class);
78 |         
79 |         conf.setOutputKeyClass(Text.class);
80 |         conf.setOutputValueClass(IntWritable.class);
81 | 
82 |         conf.setMapperClass(Step2Mapper.class);
83 |         conf.setCombinerClass(Step2Reduce.class);
84 |         conf.setReducerClass(Step2Reduce.class);
85 | 
86 |         conf.setInputFormat(TextInputFormat.class);
87 |         conf.setOutputFormat(TextOutputFormat.class);
88 | 
89 |         FileInputFormat.setInputPaths(conf, new Path(input));
90 |         FileOutputFormat.setOutputPath(conf, new Path(output));
91 | 
92 |         RunningJob job = JobClient.runJob(conf);
93 |         while (!job.isComplete()) {
94 |             job.waitForCompletion();
95 |         }
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/com.homework/src/week5/recommend/Step3.java:
--------------------------------------------------------------------------------
 1 | package recommend;
 2 | 
 3 | import java.io.IOException;
 4 | import java.util.Iterator;
 5 | import java.util.Map;
 6 | 
 7 | import com.homework.hdfs.HdfsDAO;
 8 | 
 9 | import org.apache.hadoop.fs.Path;
10 | import org.apache.hadoop.io.IntWritable;
11 | import org.apache.hadoop.io.LongWritable;
12 | import org.apache.hadoop.io.NullWritable;
13 | import org.apache.hadoop.io.Text;
14 | import org.apache.hadoop.mapred.FileInputFormat;
15 | import org.apache.hadoop.mapred.FileOutputFormat;
16 | import org.apache.hadoop.mapred.JobClient;
17 | import org.apache.hadoop.mapred.JobConf;
18 | import org.apache.hadoop.mapred.MapReduceBase;
19 | import org.apache.hadoop.mapred.Mapper;
20 | import org.apache.hadoop.mapred.OutputCollector;
21 | import org.apache.hadoop.mapred.Reducer;
22 | import org.apache.hadoop.mapred.Reporter;
23 | import org.apache.hadoop.mapred.RunningJob;
24 | import org.apache.hadoop.mapred.TextInputFormat;
25 | import org.apache.hadoop.mapred.TextOutputFormat;
26 | 
27 | /*得出用户评分矩阵，用户名从左到右排*/
28 | public class Step3 {
29 | 	public static class Step3Mapper extends MapReduceBase implements Mapper<Object,Text,NullWritable,Text>{
30 | 
31 | 		private final static IntWritable v = new IntWritable(1);
32 | 		@Override
33 | 		public void map(Object key, Text value,OutputCollector<NullWritable, Text> output, Reporter reporter)
34 | 				throws IOException {
35 | 			String[] tokens=MainPodium.DELIMITER.split(value.toString());
36 | 			Text k=new Text();
37 | 			for(int i=1;i<tokens.length;i++){
38 | 				Text v=new Text();
39 | 				String str=tokens[i].split(":")[0]+",10"+tokens[0]+","+tokens[i].split(":")[1];
40 | 				v.set(str);
41 | 				output.collect(NullWritable.get(),v);
42 | 			}
43 | 			
44 | 			
45 | 		}
46 | 		
47 | 	}
48 | 	public static void run(Map<String, String> path) throws IOException {
49 |         JobConf conf = MainPodium.config();
50 | 
51 |         String input = path.get("Step3In"); 
52 |         String output = path.get("Step3Out");
53 | 
54 |         HdfsDAO hdfs = new HdfsDAO(MainPodium.HDFS, conf);
55 |         hdfs.rmr(output);
56 | 
57 |         conf.setOutputKeyClass(NullWritable.class);
58 |         conf.setOutputValueClass(Text.class);
59 | 
60 |         conf.setMapperClass(Step3Mapper.class);
61 |         //conf.setCombinerClass(Step2Reduce.class);
62 |         //conf.setReducerClass(Step2Reduce.class);
63 | 
64 |         conf.setInputFormat(TextInputFormat.class);
65 |         conf.setOutputFormat(TextOutputFormat.class);
66 | 
67 |         FileInputFormat.setInputPaths(conf, new Path(input));
68 |         FileOutputFormat.setOutputPath(conf, new Path(output));
69 | 
70 |         RunningJob job = JobClient.runJob(conf);
71 |         while (!job.isComplete()) {
72 |             job.waitForCompletion();
73 |         }
74 |     }
75 | 	
76 | }
77 | 


--------------------------------------------------------------------------------
/com.homework/src/week5/recommend/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package recommend;


--------------------------------------------------------------------------------
/com.homework/src/week6/filterSalary/Main.java:
--------------------------------------------------------------------------------
 1 | package filterSalary;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.Map;
 5 | import java.util.regex.Pattern;
 6 | 
 7 | import org.apache.hadoop.mapred.JobConf;
 8 | //第一步，MapReduce后产生：用户，浏览职位，职位薪水
 9 | /* 1.map:
10 | * job.csv文件
11 | * key:jobid,value:job:job,salary
12 | * pv.csv文件
13 | * key:jobid,value:user,userid
14 | * 2.reduce:
15 | * key:userid,
16 | * value:jobid,salary
17 | * */
18 | 
19 | //第二步， MapReduce后产生:  用户，浏览过职位薪水相加*0.8
20 | 
21 | //第三步：过滤: 推荐结果 > 用户浏览过职位平均薪水%80 .
22 | public class Main {
23 |     public static final String HDFS = "hdfs://192.168.0.200:9000/user/hdfs/week6/";
24 |     public static final Pattern DELIMITER = Pattern.compile("[\t,]");
25 |     
26 |     public static final String Step0In = HDFS+"mahoutInput";
27 |     public static final String Step0Out = HDFS+"step0Out/";
28 |     
29 |     public static final String Step1In = HDFS+"step1In/";
30 |     public static final String Step1Out = HDFS+"step1Out/";
31 |     
32 |     public static final String Step2In = Step1Out;
33 |     public static final String Step2Out = HDFS+"step2Out/";
34 |     
35 |     public static final String Step3In1 = Step2Out;
36 |     public static final String Step3In2 = Step0Out;
37 |     public static final String Step3Out = HDFS+"step3Out/";
38 |     
39 |     public static final String Step4In1 = Step0Out;
40 |     public static final String Step4In2 = Step3Out;
41 |     public static final String Step4Out = HDFS+"step4Out/";
42 |     
43 |     public static void main(String[] args) throws Exception {
44 |         
45 |     	Map<String, String> path = new HashMap<String, String>();
46 |     	path.put("ToHdfsData1", "datafile/week6/job.csv");
47 |     	path.put("ToHdfsData2", "datafile/week6/pv.csv");
48 |     	
49 |         path.put("Step0In", Step0In);
50 |         path.put("Step0Out", Step0Out);
51 |         
52 |     	path.put("Step1In", Step1In);
53 |         path.put("Step1Out", Step1Out);
54 |         
55 |         path.put("Step2In", Step2In);
56 |         path.put("Step2Out", Step2Out);
57 |         
58 |         path.put("Step3In1", Step3In1);
59 |         path.put("Step3In2", Step3In2);
60 |         path.put("Step3Out", Step3Out);
61 |         
62 |         path.put("Step4In1", Step4In1);
63 |         path.put("Step4In2", Step4In2);
64 |         path.put("Step4Out", Step4Out);
65 |         //Step0.run(path);
66 |         //Step1.run(path);
67 |         //Step2.run(path);
68 |         Step3.run(path);
69 |         
70 |        //Step4.run(path);
71 |         
72 |         //Step4_Update.run(path);
73 |         //Step4_Update2.run(path);
74 |         System.exit(0);
75 |     }
76 | 	
77 |     public static JobConf config() {
78 |         JobConf conf = new JobConf(Main.class);
79 |         conf.setJobName("Main");
80 |         //conf.addResource("classpath:/hadoop/core-site.xml");
81 |         //conf.addResource("classpath:/hadoop/hdfs-site.xml");
82 |         //conf.addResource("classpath:/hadoop/mapred-site.xml");
83 |         conf.set("io.sort.mb", "1024");
84 |         return conf;
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/com.homework/src/week6/filterSalary/Step0.java:
--------------------------------------------------------------------------------
  1 | package filterSalary;
  2 | import java.io.IOException;
  3 | import java.util.HashMap;
  4 | import java.util.Iterator;
  5 | import java.util.Map;
  6 | import com.homework.hdfs.HdfsDAO;
  7 | import org.apache.hadoop.fs.Path;
  8 | import org.apache.hadoop.io.IntWritable;
  9 | import org.apache.hadoop.io.LongWritable;
 10 | import org.apache.hadoop.io.Text;
 11 | import org.apache.hadoop.mapred.JobClient;
 12 | import org.apache.hadoop.mapred.JobConf;
 13 | import org.apache.hadoop.mapred.RunningJob;
 14 | import org.apache.hadoop.mapreduce.Job;
 15 | import org.apache.hadoop.mapreduce.Mapper;
 16 | import org.apache.hadoop.mapreduce.Reducer;
 17 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 18 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 19 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 20 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 21 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 22 | public class Step0 {
 23 | 	public static class Step0Mapper extends Mapper<LongWritable, Text, Text, Text> {
 24 | 
 25 |         private String flag;//  
 26 | 
 27 |         @Override
 28 |         protected void setup(Context context) throws IOException, InterruptedException {
 29 |             FileSplit split = (FileSplit) context.getInputSplit();
 30 |             flag = split.getPath().getName();// 判断读的数据集
 31 | 
 32 |             // System.out.println(flag);
 33 |         }
 34 |         
 35 |         @Override
 36 |         public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
 37 |             
 38 |             String str=values.toString();
 39 |             String[] line=Main.DELIMITER.split(str);
 40 |             if(line.length==0)return;
 41 |             if(flag.equals("mahoutA.txt")){
 42 |             	String userid=line[0];
 43 |             	for(int i=1;i<line.length;i++){
 44 |                     String jobid=line[i];
 45 |                     Text k=new Text();
 46 |                     Text v=new Text();
 47 |                     k.set(jobid);
 48 |                     v.set("user,"+userid);
 49 |                     context.write(k, v);
 50 |             	}
 51 |  
 52 |             }
 53 |             if(flag.equals("job.csv")){
 54 |                 if(line.length!=3)return;
 55 |             	String jobid=line[0];
 56 |                 String salary=line[2];
 57 |                 Text k=new Text();
 58 |                 Text v=new Text();
 59 |                 k.set(jobid);
 60 |                 v.set("job,"+salary);
 61 |                 context.write(k, v);
 62 |             }
 63 | 
 64 | 
 65 |         }
 66 | 	}
 67 |     public static class Step0Reducer extends Reducer<Text, Text, Text, Text> {
 68 |     	@Override
 69 |         public void reduce(Text key,Iterable<Text> values,  Context context) throws IOException, InterruptedException {
 70 |     		Map<String,String> map=new HashMap<String,String>();
 71 |     		Integer i=0;
 72 |     		for(Text value:values){
 73 |     			String[] arr=Main.DELIMITER.split(value.toString());
 74 |     			i=i+1;
 75 |     			if(arr[0].equals("job"))
 76 |     			map.put(arr[0], arr[1]);
 77 |     			else
 78 |     			{
 79 |     				map.put(i.toString(), arr[1]);
 80 |     			}
 81 |     		}
 82 |     		String salary=map.get("job");
 83 |     	    for(Map.Entry<String,String> entry:map.entrySet()){
 84 |     			if(entry.getKey().equals("job"))continue;
 85 |     			Text k=new Text();
 86 |     			Text v=new Text();
 87 |     			k.set(entry.getValue());
 88 |     			v.set(key.toString()+","+salary);
 89 |     			context.write(k, v);
 90 |     		}
 91 |     	}
 92 |     }
 93 |     public static void run(Map<String, String> path) throws IOException, ClassNotFoundException, InterruptedException {
 94 |         JobConf conf = Main.config();
 95 | 
 96 |         String input = path.get("Step0In");
 97 |         String output = path.get("Step0Out");
 98 | 
 99 |         HdfsDAO hdfs = new HdfsDAO(Main.HDFS, conf);
100 |         hdfs.rmr(output);
101 |         //hdfs.copyFile(path.get("ToHdfsData1"), input);
102 |         Job job = new Job(conf);
103 |         job.setJarByClass(Step0.class);
104 | 
105 |         job.setOutputKeyClass(Text.class);
106 |         job.setOutputValueClass(Text.class);
107 | 
108 |         job.setMapperClass(Step0Mapper.class);
109 |         job.setReducerClass(Step0Reducer.class);
110 | 
111 |         job.setInputFormatClass(TextInputFormat.class);
112 |         job.setOutputFormatClass(TextOutputFormat.class);
113 | 
114 |         FileInputFormat.setInputPaths(job, new Path(input));
115 |         FileOutputFormat.setOutputPath(job, new Path(output));
116 | 
117 |         job.waitForCompletion(true);
118 |     }
119 | }
120 | 


--------------------------------------------------------------------------------
/com.homework/src/week6/filterSalary/Step1.java:
--------------------------------------------------------------------------------
  1 | package filterSalary;
  2 | import java.io.IOException;
  3 | import java.util.HashMap;
  4 | import java.util.Iterator;
  5 | import java.util.Map;
  6 | import com.homework.hdfs.HdfsDAO;
  7 | import org.apache.hadoop.fs.Path;
  8 | import org.apache.hadoop.io.IntWritable;
  9 | import org.apache.hadoop.io.LongWritable;
 10 | import org.apache.hadoop.io.Text;
 11 | import org.apache.hadoop.mapred.JobClient;
 12 | import org.apache.hadoop.mapred.JobConf;
 13 | import org.apache.hadoop.mapred.RunningJob;
 14 | import org.apache.hadoop.mapreduce.Job;
 15 | import org.apache.hadoop.mapreduce.Mapper;
 16 | import org.apache.hadoop.mapreduce.Reducer;
 17 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 18 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 19 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 20 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 21 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 22 | //第一步，MapReduce后产生：用户，浏览职位，职位薪水
 23 | /* 1.map:
 24 |  * job.csv文件
 25 |  * key:jobid,value:job:job,salary
 26 |  * pv.csv文件
 27 |  * key:jobid,value:user,userid
 28 |  * 2.reduce:
 29 |  * key:userid,
 30 |  * value:jobid,salary
 31 |  * */
 32 | 
 33 | //第二步， MapReduce后产生:  用户，浏览过职位薪水相加*0.8
 34 | 
 35 | //第三步：过滤: 推荐结果 > 用户浏览过职位平均薪水%80 .
 36 | public class Step1 {
 37 | 
 38 | 	public static class Step1Mapper extends Mapper<LongWritable, Text, Text, Text> {
 39 | 
 40 |         private String flag;//  
 41 | 
 42 |         @Override
 43 |         protected void setup(Context context) throws IOException, InterruptedException {
 44 |             FileSplit split = (FileSplit) context.getInputSplit();
 45 |             flag = split.getPath().getName();// 判断读的数据集
 46 | 
 47 |             // System.out.println(flag);
 48 |         }
 49 |         
 50 |         @Override
 51 |         public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
 52 |             
 53 |             String str=values.toString();
 54 |             String[] line=Main.DELIMITER.split(str);
 55 |             if(line.length==0)return;
 56 |             if(flag.equals("pv.csv")){
 57 |             	String userid=line[0];
 58 |                 String jobid=line[1];
 59 |                 Text k=new Text();
 60 |                 Text v=new Text();
 61 |                 k.set(jobid);
 62 |                 v.set("user,"+userid);
 63 |                 context.write(k, v);
 64 |             }
 65 |             if(flag.equals("job.csv")){
 66 |                 if(line.length!=3)return;
 67 |             	String jobid=line[0];
 68 |                 String salary=line[2];
 69 |                 Text k=new Text();
 70 |                 Text v=new Text();
 71 |                 k.set(jobid);
 72 |                 v.set("job,"+salary);
 73 |                 context.write(k, v);
 74 |             }
 75 | 
 76 | 
 77 |         }
 78 | 	}
 79 |     public static class Step1Reducer extends Reducer<Text, Text, Text, Text> {
 80 |     	@Override
 81 |         public void reduce(Text key,Iterable<Text> values,  Context context) throws IOException, InterruptedException {
 82 |     		Map<String,String> map=new HashMap<String,String>();
 83 |     		Integer i=0;
 84 |     		for(Text value:values){
 85 |     			String[] arr=Main.DELIMITER.split(value.toString());
 86 |     			i=i+1;
 87 |     			if(arr[0].equals("job"))
 88 |     			map.put(arr[0], arr[1]);
 89 |     			else
 90 |     			{
 91 |     				map.put(i.toString(), arr[1]);
 92 |     			}
 93 |     		}
 94 |     		String salary=map.get("job");
 95 |     	
 96 |     		for(Map.Entry<String,String> entry:map.entrySet()){
 97 |     			if(entry.getKey().equals("job"))continue;
 98 |     			Text k=new Text();
 99 |     			Text v=new Text();
100 |     			k.set(entry.getValue());
101 |     			v.set(key.toString()+","+salary);
102 |     			context.write(k, v);
103 |     		}
104 |     		
105 |     	}
106 |     }
107 |     public static void run(Map<String, String> path) throws IOException, ClassNotFoundException, InterruptedException {
108 |         JobConf conf = Main.config();
109 | 
110 |         String input = path.get("Step1In");
111 |         String output = path.get("Step1Out");
112 | 
113 |         HdfsDAO hdfs = new HdfsDAO(Main.HDFS, conf);
114 | //        hdfs.rmr(output);
115 |         hdfs.rmr(output);
116 |         hdfs.rmr(input);
117 |         hdfs.mkdirs(input);
118 |         hdfs.copyFile(path.get("ToHdfsData1"), input);
119 |         hdfs.copyFile(path.get("ToHdfsData2"), input);
120 |         
121 |         Job job = new Job(conf);
122 |         job.setJarByClass(Step1.class);
123 | 
124 |         job.setOutputKeyClass(Text.class);
125 |         job.setOutputValueClass(Text.class);
126 | 
127 |         job.setMapperClass(Step1Mapper.class);
128 |         job.setReducerClass(Step1Reducer.class);
129 | 
130 |         job.setInputFormatClass(TextInputFormat.class);
131 |         job.setOutputFormatClass(TextOutputFormat.class);
132 | 
133 |         FileInputFormat.setInputPaths(job, new Path(input));
134 |         FileOutputFormat.setOutputPath(job, new Path(output));
135 | 
136 |         job.waitForCompletion(true);
137 |     }
138 | }
139 | 


--------------------------------------------------------------------------------
/com.homework/src/week6/filterSalary/Step2.java:
--------------------------------------------------------------------------------
 1 | package filterSalary;
 2 | import java.io.IOException;
 3 | import java.util.HashMap;
 4 | import java.util.Iterator;
 5 | import java.util.Map;
 6 | 
 7 | import com.homework.hdfs.HdfsDAO;
 8 | 
 9 | import org.apache.hadoop.fs.Path;
10 | import org.apache.hadoop.io.IntWritable;
11 | import org.apache.hadoop.io.LongWritable;
12 | import org.apache.hadoop.io.Text;
13 | import org.apache.hadoop.mapred.JobClient;
14 | import org.apache.hadoop.mapred.JobConf;
15 | import org.apache.hadoop.mapred.RunningJob;
16 | import org.apache.hadoop.mapreduce.Job;
17 | import org.apache.hadoop.mapreduce.Mapper;
18 | import org.apache.hadoop.mapreduce.Reducer;
19 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
20 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
21 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
22 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
23 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
24 | //第二步， MapReduce后产生:  用户，浏览过职位薪水相加*0.8
25 | /*1.map
26 | 	key:userid
27 | 	value:salary
28 | 2.reduce
29 |    key:userid
30 |    value:平均薪水*0.8*/
31 | public class Step2 {
32 | 
33 | 	public static class Step2Mapper extends Mapper<LongWritable, Text, Text, Text> {
34 | 
35 |        @Override
36 |         public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
37 |             
38 |             String str=values.toString();
39 |             String[] line=Main.DELIMITER.split(str);
40 |             if(line.length==0)return;
41 |            String userid=line[0];
42 |            String salary=line[2];
43 |            Text k=new Text();
44 |            Text v=new Text();
45 |            k.set(userid);
46 |            v.set(salary);
47 |            context.write(k, v);
48 |         }
49 | 	}
50 |     public static class Step2Reducer extends Reducer<Text, Text, Text, Text> {
51 |     	@Override
52 |         public void reduce(Text key,Iterable<Text> values,  Context context) throws IOException, InterruptedException {
53 |     		Integer i=0;
54 |     		Double sum=0.0;
55 |     		for(Text value:values){
56 |     			i=i+1;
57 |     			Double val=Double.valueOf(value.toString());
58 |     			sum=sum+val;
59 |     		}
60 |     		Double  average=sum/i;
61 |     		Double va=average*0.8;
62 | 			
63 | 			Text v=new Text();
64 | 			v.set(va.toString());
65 | 			context.write(key, v);
66 |     	}
67 |     }
68 |     public static void run(Map<String, String> path) throws IOException, ClassNotFoundException, InterruptedException {
69 |         JobConf conf = Main.config();
70 | 
71 |         String input = path.get("Step2In");
72 |         String output = path.get("Step2Out");
73 | 
74 |         HdfsDAO hdfs = new HdfsDAO(Main.HDFS, conf);
75 |         hdfs.rmr(output);
76 |         Job job = new Job(conf);
77 |         job.setJarByClass(Step2.class);
78 | 
79 |         job.setOutputKeyClass(Text.class);
80 |         job.setOutputValueClass(Text.class);
81 | 
82 |         job.setMapperClass(Step2Mapper.class);
83 |         job.setReducerClass(Step2Reducer.class);
84 |         
85 |         job.setInputFormatClass(TextInputFormat.class);
86 |         job.setOutputFormatClass(TextOutputFormat.class);
87 | 
88 |         FileInputFormat.setInputPaths(job, new Path(input));
89 |         FileOutputFormat.setOutputPath(job, new Path(output));
90 | 
91 |         job.waitForCompletion(true);
92 |     }
93 | }
94 | 


--------------------------------------------------------------------------------
/com.homework/src/week6/filterSalary/Step3.java:
--------------------------------------------------------------------------------
  1 | package filterSalary;
  2 | import java.io.IOException;
  3 | import java.util.HashMap;
  4 | import java.util.Iterator;
  5 | import java.util.Map;
  6 | import com.homework.hdfs.HdfsDAO;
  7 | import org.apache.hadoop.fs.Path;
  8 | import org.apache.hadoop.io.IntWritable;
  9 | import org.apache.hadoop.io.LongWritable;
 10 | import org.apache.hadoop.io.Text;
 11 | import org.apache.hadoop.mapred.JobClient;
 12 | import org.apache.hadoop.mapred.JobConf;
 13 | import org.apache.hadoop.mapred.RunningJob;
 14 | import org.apache.hadoop.mapreduce.Job;
 15 | import org.apache.hadoop.mapreduce.Mapper;
 16 | import org.apache.hadoop.mapreduce.Reducer;
 17 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 18 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 19 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 20 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 21 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 22 | public class Step3 {
 23 | 	public static class Step3Mapper extends Mapper<LongWritable, Text, Text, Text> {
 24 | 
 25 |         private String flag;//  
 26 | 
 27 |         @Override
 28 |         protected void setup(Context context) throws IOException, InterruptedException {
 29 |             FileSplit split = (FileSplit) context.getInputSplit();
 30 |             flag = split.getPath().getParent().getName();// 判断读的数据集
 31 | 
 32 |             // System.out.println(flag);
 33 |         }
 34 |         
 35 |         @Override
 36 |         public void map(LongWritable key, Text values, Context context) throws IOException, InterruptedException {
 37 |             
 38 |             String str=values.toString();
 39 |             String[] line=Main.DELIMITER.split(str);
 40 |             if(line.length==0)return;
 41 |             if(flag.equals("step0Out")){
 42 |             	String userid=line[0];
 43 |                 String jobid=line[1];
 44 |                 String salary=line[2];
 45 |                 Text k=new Text();
 46 |                 Text v=new Text();
 47 |                 k.set(userid);
 48 |                 v.set(jobid+","+salary);
 49 |                 context.write(k, v);
 50 |             }
 51 |             if(flag.equals("step2Out")){
 52 |                 if(line.length!=2)return;
 53 |             	String userid=line[0];
 54 |                 String salary=line[1];
 55 |                 Text k=new Text();
 56 |                 Text v=new Text();
 57 |                 k.set(userid);
 58 |                 v.set("average,"+salary);
 59 |                 context.write(k, v);
 60 |             }
 61 | 
 62 | 
 63 |         }
 64 | 	}
 65 |     public static class Step3Reducer extends Reducer<Text, Text, Text, Text> {
 66 |     	@Override
 67 |         public void reduce(Text key,Iterable<Text> values,  Context context) throws IOException, InterruptedException {
 68 |     		Map<String,String> map=new HashMap<String,String>();
 69 |     		Integer i=0;
 70 |     		for(Text value:values){
 71 |     			String[] arr=Main.DELIMITER.split(value.toString());
 72 |     			i=i+1;
 73 |     			if(arr[0].equals("average"))
 74 |     			map.put(arr[0], arr[1]);
 75 |     			else
 76 |     			{
 77 |     				map.put(arr[0], arr[1]);
 78 |     			}
 79 |     		}
 80 |     		String salary=map.get("average");
 81 |     	    Double average=Double.valueOf(salary);
 82 |     	    StringBuilder sb=new StringBuilder();
 83 |     		for(Map.Entry<String,String> entry:map.entrySet()){
 84 |     			if(entry.getKey().equals("average"))continue;
 85 | 
 86 |     			Double val=Double.valueOf(entry.getValue());
 87 |     			if(val>=average){
 88 |     				sb.append("(推荐职位ID："+entry.getKey()+",薪水："+entry.getValue()+"),");
 89 |     			}
 90 | 
 91 |     		}
 92 |     		String result = String.format("%.2f", average);
 93 |     		if(sb.length()>1){
 94 |     			sb.append("(%80平均薪水:"+result+")");
 95 |     		}
 96 | 			Text k=new Text();
 97 | 			Text v=new Text();
 98 |     		k.set("用户:"+key);
 99 | 			v.set(sb.toString());
100 | 			context.write(k, v);
101 |     		
102 |     	}
103 |     }
104 |     public static void run(Map<String, String> path) throws IOException, ClassNotFoundException, InterruptedException {
105 |         JobConf conf = Main.config();
106 | 
107 |         String input1 = path.get("Step3In1");
108 |         String input2 = path.get("Step3In2");
109 |         String output = path.get("Step3Out");
110 | 
111 |         HdfsDAO hdfs = new HdfsDAO(Main.HDFS, conf);
112 | //        hdfs.rmr(output);
113 |         hdfs.rmr(output);
114 | 
115 |         
116 |         Job job = new Job(conf);
117 |         job.setJarByClass(Step3.class);
118 | 
119 |         job.setOutputKeyClass(Text.class);
120 |         job.setOutputValueClass(Text.class);
121 | 
122 |         job.setMapperClass(Step3Mapper.class);
123 |         job.setReducerClass(Step3Reducer.class);
124 | 
125 |         job.setInputFormatClass(TextInputFormat.class);
126 |         job.setOutputFormatClass(TextOutputFormat.class);
127 | 
128 |         FileInputFormat.setInputPaths(job, new Path(input1),new Path(input2));
129 |         FileOutputFormat.setOutputPath(job, new Path(output));
130 | 
131 |         job.waitForCompletion(true);
132 |     }
133 | }
134 | 


--------------------------------------------------------------------------------
/com.homework/src/week6/filterSalary/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package filterSalary;


--------------------------------------------------------------------------------
/com.homework/src/week6/recommendJob/ItemLoglikelihood.java:
--------------------------------------------------------------------------------
 1 | package recommendJob;
 2 | 
 3 | import java.io.BufferedReader;
 4 | import java.io.File;
 5 | import java.io.FileReader;
 6 | import java.io.IOException;
 7 | import java.text.ParseException;
 8 | import java.text.SimpleDateFormat;
 9 | import java.util.Date;
10 | import java.util.HashSet;
11 | import java.util.List;
12 | import java.util.Set;
13 | 
14 | import org.apache.mahout.cf.taste.common.TasteException;
15 | import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
16 | import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
17 | import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
18 | import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
19 | import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
20 | import org.apache.mahout.cf.taste.model.DataModel;
21 | import org.apache.mahout.cf.taste.recommender.IDRescorer;
22 | import org.apache.mahout.cf.taste.recommender.RecommendedItem;
23 | import org.apache.mahout.cf.taste.recommender.Recommender;
24 | import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
25 | import org.apache.mahout.cf.taste.similarity.UserSimilarity;
26 | 
27 | public class ItemLoglikelihood {
28 | 
29 | 	final static int neighborhoodNum=2;
30 | 	final static int recommendNum=3;
31 | 	public static void main(String[] args) throws TasteException, IOException {
32 | 		String file="datafile/week6/pv.csv";
33 | 		DataModel dataModel=new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(new FileDataModel(new File(file))));
34 | 		ItemSimilarity itemSimilarity=new LogLikelihoodSimilarity(dataModel);
35 | 		Recommender recommender=new GenericBooleanPrefItemBasedRecommender(dataModel,itemSimilarity);
36 | 		
37 |         LongPrimitiveIterator iterator=dataModel.getUserIDs();
38 |         while(iterator.hasNext()){
39 |         	long uid=iterator.nextLong();
40 |             Set<Long> jobids = getOutdateJobID("datafile/week6/job.csv");
41 |             IDRescorer rescorer = new JobRescorer(jobids);
42 |         	List<RecommendedItem> list=recommender.recommend(uid, recommendNum,rescorer);
43 |         	//System.out.printf("uid:%s", uid);
44 |         	System.out.printf("%s",uid);
45 |         	for(RecommendedItem ritem:list){
46 |         		//System.out.printf("(%s,%f)", ritem.getItemID(), ritem.getValue());
47 |         		System.out.printf("%s",","+ ritem.getItemID());
48 |         		
49 |         	}
50 |         	System.out.println();
51 |         }
52 | 	}
53 | 	
54 |     public static Set<Long> getOutdateJobID(String file) throws IOException {
55 |         BufferedReader br = new BufferedReader(new FileReader(new File(file)));
56 |         Set<Long> jobids = new HashSet<Long>();
57 |         String s = null;
58 |         while ((s = br.readLine()) != null) {
59 |             String[] cols = s.split(",");
60 |             SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd");
61 |             Date date = null;
62 |             try {
63 |                 date = df.parse(cols[1]);
64 |                 if (date.getTime() < df.parse("2013-01-01").getTime()) {
65 |                     jobids.add(Long.parseLong(cols[0]));
66 |                 }
67 |             } catch (ParseException e) {
68 |                 e.printStackTrace();
69 |             }
70 | 
71 |         }
72 |         br.close();
73 |         return jobids;
74 |     }
75 | 
76 | }
77 | class JobRescorer implements IDRescorer {
78 |     final private Set<Long> jobids;
79 | 
80 |     public JobRescorer(Set<Long> jobs) {
81 |         this.jobids = jobs;
82 |     }
83 | 
84 |     @Override
85 |     public double rescore(long id, double originalScore) {
86 |         return isFiltered(id) ? Double.NaN : originalScore;
87 |     }
88 | 
89 |     @Override
90 |     public boolean isFiltered(long id) {
91 |         return jobids.contains(id);
92 |     }
93 | }
94 | 


--------------------------------------------------------------------------------
/com.homework/src/week6/recommendJob/UserCityBlock.java:
--------------------------------------------------------------------------------
  1 | package recommendJob;
  2 | 
  3 | 
  4 | import java.io.BufferedReader;  
  5 | import java.io.File;  
  6 | import java.io.FileReader;  
  7 | import java.io.IOException;  
  8 | import java.util.HashMap;  
  9 | import java.util.HashSet;  
 10 | import java.util.List;  
 11 | import java.util.Map;  
 12 | import java.util.Set;  
 13 | 
 14 | import org.apache.mahout.cf.taste.common.TasteException;  
 15 | import org.apache.mahout.cf.taste.impl.common.FastIDSet;  
 16 | import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;  
 17 | import org.apache.mahout.cf.taste.model.DataModel;  
 18 | import org.apache.mahout.cf.taste.recommender.IDRescorer;
 19 | import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 20 | import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
 21 | import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
 22 | import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
 23 | import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefUserBasedRecommender;
 24 | import org.apache.mahout.cf.taste.impl.similarity.CityBlockSimilarity;
 25 | import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
 26 | import org.apache.mahout.cf.taste.recommender.Recommender;
 27 | import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 28 | 
 29 | public class UserCityBlock {
 30 | 
 31 | 	final static int neighborhoodNum=2;
 32 | 	final static int recommendNum=3;
 33 | 	
 34 | 	public static void main(String[] args) throws TasteException, IOException {
 35 | 		String file="datafile/week6/pv.csv";
 36 | 		DataModel dataModel=new GenericBooleanPrefDataModel(GenericBooleanPrefDataModel.toDataMap(new FileDataModel(new File(file))));
 37 | 		UserSimilarity userSimilarity=new CityBlockSimilarity(dataModel);
 38 | 		UserNeighborhood userNeighborhood=new NearestNUserNeighborhood(neighborhoodNum,userSimilarity,dataModel);
 39 | 		Recommender recommender=new GenericBooleanPrefUserBasedRecommender(dataModel, userNeighborhood, userSimilarity);
 40 |         LongPrimitiveIterator iterator=dataModel.getUserIDs();
 41 |         Map<Long, Double> averSalary = getAverSalary("datafile/week6/job.csv", dataModel);  
 42 |         while(iterator.hasNext()){
 43 |         	long uid=iterator.nextLong();
 44 |         	
 45 |         	Set<Long> jobids = getSalaryJobID(uid, "datafile/week6/job.csv", averSalary);  
 46 |             IDRescorer rescorer = new JobRescorer(jobids); 
 47 |         	
 48 |         	List<RecommendedItem> list=recommender.recommend(uid, recommendNum,rescorer);
 49 |         	 System.out.printf("uid:%s", uid);
 50 |         	for(RecommendedItem ritem:list){
 51 |         		System.out.printf("(%s,%f)", ritem.getItemID(), ritem.getValue());
 52 |         	}
 53 |         	System.out.println();
 54 |         }
 55 |     }
 56 | 	
 57 | 	public static Set<Long> getSalaryJobID(long uid, String file, Map<Long, Double> averSalary) throws IOException {  
 58 |         BufferedReader br = new BufferedReader(new FileReader(new File(file)));  
 59 |         Set<Long> jobids = new HashSet<Long>();  
 60 |         String s = null;  
 61 |         while ((s = br.readLine()) != null) {  
 62 |                 String[] cols = s.split(",");  
 63 |                 double salary = Double.valueOf(cols[2]);  
 64 |                 if (salary < averSalary.get(uid)) {  
 65 |                         jobids.add(Long.parseLong(cols[0]));  
 66 |                 }  
 67 |         }  
 68 |         br.close();  
 69 |         return jobids;  
 70 |     }  
 71 |       
 72 |     // 获取每个用户的基准比较工资：aver(浏览过的工资)*0.8  
 73 |     public static Map<Long, Double> getAverSalary(String file, DataModel dataModel)   
 74 |                             throws NumberFormatException, IOException, TasteException{  
 75 |             Map<Long, Double> salaries = new HashMap<Long, Double>();  
 76 |             BufferedReader br = new BufferedReader(new FileReader(new File(file)));  
 77 |         String s = null;  
 78 |         while ((s = br.readLine()) != null) {  
 79 |                 String[] cols = s.split(",");  
 80 |                 salaries.put(Long.parseLong(cols[0]), Double.valueOf(cols[2]));  
 81 |         }  
 82 |         br.close();  
 83 |   
 84 |   
 85 |             Map<Long, Double> averSalaries = new HashMap<Long, Double>();  
 86 |             LongPrimitiveIterator iter = dataModel.getUserIDs();  
 87 |             while (iter.hasNext()) {  
 88 |             long uid = iter.nextLong();  
 89 |             FastIDSet items = dataModel.getItemIDsFromUser(uid);  
 90 |             LongPrimitiveIterator itemsIter = items.iterator();  
 91 |             double sum = 0;  
 92 |             int count = 0;  
 93 |             double aver = 0.0;  
 94 |             while (itemsIter.hasNext()) {  
 95 |                 long item = itemsIter.nextLong();  
 96 |                 double salary = salaries.get(item);  
 97 |                 sum += salary;  
 98 |                 count ++;  
 99 |             }  
100 |             if(count > 0) aver = 0.8*sum/count;  
101 |             averSalaries.put(uid, aver);  
102 |             }  
103 |             return averSalaries;  
104 |     }  
105 |   
106 | 
107 | }
108 | 


--------------------------------------------------------------------------------
/com.homework/src/week6/recommendJob/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package recommendJob;


--------------------------------------------------------------------------------
/com.homework/src/week6/test/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package test;


--------------------------------------------------------------------------------
/com.homework/src/week7/classfier/Main.java:
--------------------------------------------------------------------------------
 1 | package classfier;
 2 | 
 3 | import java.util.HashMap;
 4 | import java.util.Map;
 5 | import java.util.regex.Pattern;
 6 | 
 7 | import org.apache.hadoop.mapred.JobConf;
 8 | //第一步，MapReduce后产生：用户，浏览职位，职位薪水
 9 | /* 1.map:
10 | * job.csv文件
11 | * key:jobid,value:job:job,salary
12 | * pv.csv文件
13 | * key:jobid,value:user,userid
14 | * 2.reduce:
15 | * key:userid,
16 | * value:jobid,salary
17 | * */
18 | 
19 | //第二步， MapReduce后产生:  用户，浏览过职位薪水相加*0.8
20 | 
21 | //第三步：过滤: 推荐结果 > 用户浏览过职位平均薪水%80 .
22 | public class Main {
23 |     public static final String HDFS = "hdfs://10.3.7.201:9000/user/hdfs/week7/";
24 |     public static final Pattern DELIMITER = Pattern.compile("[\t,]");
25 |     
26 |     public static final String PaodingFirstIn = HDFS+"in/";
27 |     public static final String PaodingFirstOut = HDFS+"out/";
28 |     
29 |    
30 |     
31 |     public static void main(String[] args) throws Exception {
32 |         
33 |     	Map<String, String> path = new HashMap<String, String>();
34 |     	
35 |     	
36 |         path.put("PaodingFirstIn", PaodingFirstIn);
37 |         path.put("PaodingFirstOut", PaodingFirstOut);
38 |         
39 |     	
40 |         PaodingFirst.run(path);
41 |         //Step3.run(path);
42 |        
43 |         System.exit(0);
44 |     }
45 | 	
46 |     public static JobConf config() {
47 |         JobConf conf = new JobConf(Main.class);
48 |         
49 |         conf.setJobName("Main");
50 |         //conf.addResource("classpath:/hadoop/core-site.xml");
51 |         //conf.addResource("classpath:/hadoop/hdfs-site.xml");
52 |         //conf.addResource("classpath:/hadoop/mapred-site.xml");
53 |         conf.set("io.sort.mb", "1024");
54 |         return conf;
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/classfier/PaodingFirst.java:
--------------------------------------------------------------------------------
  1 | package classfier;
  2 | import java.io.IOException;
  3 | import java.io.StringReader;
  4 | import java.util.HashMap;
  5 | import java.util.Iterator;
  6 | import java.util.Map;
  7 | 
  8 | import com.homework.hdfs.HdfsDAO;
  9 | 
 10 | 
 11 | 
 12 | import myInputFormat.JamesInputFormat;
 13 | import net.paoding.analysis.analyzer.PaodingAnalyzer;
 14 | 
 15 | import org.apache.hadoop.fs.FileStatus;
 16 | import org.apache.hadoop.fs.FileSystem;
 17 | import org.apache.hadoop.fs.Path;
 18 | import org.apache.hadoop.io.IntWritable;
 19 | import org.apache.hadoop.io.LongWritable;
 20 | import org.apache.hadoop.io.Text;
 21 | import org.apache.hadoop.mapred.JobClient;
 22 | import org.apache.hadoop.mapred.JobConf;
 23 | import org.apache.hadoop.mapred.RunningJob;
 24 | import org.apache.hadoop.mapreduce.Job;
 25 | import org.apache.hadoop.mapreduce.Mapper;
 26 | import org.apache.hadoop.mapreduce.Reducer;
 27 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 28 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 29 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 30 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 31 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 32 | import org.apache.lucene.analysis.TokenStream;
 33 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 34 | public class PaodingFirst {
 35 | 	public static class PaodingFirstMapper extends Mapper<LongWritable, Text, Text, Text> {
 36 | 
 37 |         private String flag;//  
 38 |         PaodingAnalyzer analyzer = new PaodingAnalyzer();
 39 |         Text v=new Text();
 40 |         Text k=new Text();
 41 |         @Override
 42 |         protected void setup(Context context) throws IOException, InterruptedException {
 43 |             FileSplit split = (FileSplit) context.getInputSplit();
 44 |             flag = split.getPath().getParent().getName();// 判断读的数据集
 45 | 
 46 |             // System.out.println(flag);
 47 |         }
 48 |         
 49 |         @Override
 50 |         public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
 51 |             
 52 |             k.set(flag);
 53 |             PaodingAnalyzer analyzer=new PaodingAnalyzer();
 54 |             StringReader sr=new StringReader(value.toString());
 55 |             TokenStream ts=analyzer.tokenStream("", sr);
 56 |             StringBuilder sb=new StringBuilder();
 57 |             try{
 58 |             	while(ts.incrementToken()){
 59 |             		CharTermAttribute ta=ts.getAttribute(CharTermAttribute.class);
 60 |             		sb.append(ta.toString());
 61 |             		sb.append(" ");
 62 |             		System.out.print(ta.toString()+" ");
 63 |             	}
 64 |             }catch(Exception e){
 65 |             	
 66 |             }
 67 |             System.out.println();
 68 |             v.set(sb.toString());
 69 |             context.write(k, v);
 70 |         }
 71 | 	}
 72 |     
 73 |     	
 74 |     public static void run(Map<String, String> path) throws IOException, ClassNotFoundException, InterruptedException {
 75 |         JobConf conf = Main.config();
 76 |         conf.set("dfs.permissions","false");
 77 |         conf.setInt("mapred.min.split.size", 1);
 78 |         //conf.set("mapred.job.tracker", "[192.168.0.200]:9001");
 79 |         conf.setLong("mapreduce.input.fileinputformat.split.maxsize", 4000000);    //max size of Split
 80 |         String input = path.get("PaodingFirstIn");
 81 |         String output = path.get("PaodingFirstOut");
 82 | 
 83 |         HdfsDAO hdfs = new HdfsDAO(Main.HDFS, conf);
 84 |         hdfs.rmr(output);
 85 |         //hdfs.copyFile(path.get("ToHdfsData1"), input);
 86 |         Job job = new Job(conf);
 87 |         job.setJarByClass(PaodingFirst.class);
 88 | 
 89 |         job.setOutputKeyClass(Text.class);
 90 |         job.setOutputValueClass(Text.class);
 91 | 
 92 |         job.setMapperClass(PaodingFirstMapper.class);
 93 |        
 94 | 
 95 |         job.setInputFormatClass(JamesInputFormat.class);
 96 |         job.setOutputFormatClass(TextOutputFormat.class);
 97 | 
 98 |        Path inpath= new Path(input);
 99 | 		try {                                            //  input path
100 | 			FileSystem fs = inpath.getFileSystem(conf);
101 | 			FileStatus[] stats = fs.listStatus(inpath);
102 | 			for(int i=0; i<stats.length; i++)
103 | 				FileInputFormat.addInputPath(job, stats[i].getPath());
104 | 		} catch (IOException e1) {
105 | 			e1.printStackTrace();
106 | 			return;
107 | 		}
108 |         
109 |         //FileInputFormat.setInputPaths(job, new Path(input));
110 |         FileOutputFormat.setOutputPath(job, new Path(output));
111 | 
112 |         job.waitForCompletion(true);
113 |     }
114 | 	
115 | 	
116 | }
117 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/classfier/PaodingTest.java:
--------------------------------------------------------------------------------
 1 | package classfier;
 2 | 
 3 | import java.io.StringReader;
 4 | 
 5 | import org.apache.lucene.analysis.TokenStream;
 6 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 7 | 
 8 | import net.paoding.analysis.analyzer.PaodingAnalyzer;
 9 | 
10 | public class PaodingTest {
11 | 
12 | 	public static void main(String[] args) {
13 | 		// TODO Auto-generated method stub
14 |         //String line="中华人民共和国";
15 |         String line="据路透社报道，印度尼西亚社会事务部一官员星期二(29日)表示，"      
16 | 	+ "日惹市附近当地时间27日晨5时53分发生的里氏6.2级地震已经造成至少5427人死亡，"      
17 | 	+ "20000余人受伤，近20万人无家可归。";
18 |         PaodingAnalyzer analyzer=new PaodingAnalyzer();
19 |         StringReader sr=new StringReader(line);
20 |         TokenStream ts=analyzer.tokenStream("", sr);
21 |         try{
22 |         	while(ts.incrementToken()){
23 |         		CharTermAttribute ta=ts.getAttribute(CharTermAttribute.class);
24 |         		System.out.println(ta.toString());
25 |         	}
26 |         	
27 |         }catch(Exception e){
28 |         	
29 |         }
30 |         
31 | 	}
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/classfier/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package classfier;


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/.compiled/most-words-mode/.metadata:
--------------------------------------------------------------------------------
1 | #dont edit it! this file was auto generated by paoding.
2 | #Tue Dec 17 22:27:39 CST 2013
3 | paoding.analysis.compiler.checksum=24243
4 | paoding.analysis.compiler.version=2
5 | paoding.analysis.properties.files=paoding-analysis.properties;paoding-analysis-default.properties;paoding-analyzer.properties;paoding-dic-home.properties;paoding-dic-names.properties;paoding-knives.properties;paoding-knives-user.properties
6 | paoding.analysis.properties.lastModifieds=1353916802000;1353916802000;1353916802000;1386686832000;1352952482000;1353916802000;1353916802000
7 | paoding.analysis.compiler.class=net.paoding.analysis.analyzer.impl.MostWordsModeDictionariesCompiler
8 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/.compiled/most-words-mode/x-confucian-family-name.dic.compiled:
--------------------------------------------------------------------------------
  1 | 丁
  2 | 万
  3 | 上官
  4 | 丘
  5 | 东郭
  6 | 严
  7 | 丰
  8 | 乃
  9 | 乌
 10 | 乐
 11 | 乔
 12 | 习
 13 | 云
 14 | 亚
 15 | 仇
 16 | 仝
 17 | 任
 18 | 伊
 19 | 伏
 20 | 休
 21 | 伦
 22 | 伯
 23 | 何
 24 | 佘
 25 | 余
 26 | 佟
 27 | 佩
 28 | 侯
 29 | 俄
 30 | 保
 31 | 俞
 32 | 倪
 33 | 傅
 34 | 储
 35 | 元
 36 | 克
 37 | 公孙
 38 | 兰
 39 | 关
 40 | 兹
 41 | 内
 42 | 冈
 43 | 冉
 44 | 农
 45 | 冯
 46 | 况
 47 | 冷
 48 | 冼
 49 | 凌
 50 | 凤
 51 | 凯
 52 | 刀
 53 | 刁
 54 | 切
 55 | 刘
 56 | 利
 57 | 加
 58 | 努
 59 | 励
 60 | 劳
 61 | 勒
 62 | 勾
 63 | 包
 64 | 匡
 65 | 匹
 66 | 华
 67 | 卓
 68 | 单
 69 | 单于
 70 | 博
 71 | 卜
 72 | 卞
 73 | 卡
 74 | 卢
 75 | 卫
 76 | 印
 77 | 危
 78 | 厄
 79 | 厉
 80 | 及
 81 | 古
 82 | 可
 83 | 史
 84 | 叶
 85 | 司徒
 86 | 司空
 87 | 司马
 88 | 合
 89 | 吉
 90 | 吕
 91 | 吴
 92 | 周
 93 | 哈
 94 | 哥
 95 | 唐
 96 | 商
 97 | 喀
 98 | 喻
 99 | 图
100 | 土
101 | 坦
102 | 埃
103 | 基
104 | 塔
105 | 塞
106 | 墨
107 | 夏
108 | 多
109 | 大
110 | 夫
111 | 奇
112 | 奚
113 | 奥
114 | 姆
115 | 姚
116 | 姜
117 | 姬
118 | 娄
119 | 孔
120 | 孙
121 | 孟
122 | 季
123 | 安
124 | 宋
125 | 宗
126 | 官
127 | 宝
128 | 宣
129 | 宫
130 | 容
131 | 宾
132 | 寿
133 | 封
134 | 尉
135 | 小泉
136 | 尔
137 | 尤
138 | 尹
139 | 尼
140 | 居
141 | 屈
142 | 屠
143 | 岑
144 | 岳
145 | 崔
146 | 左
147 | 巫
148 | 巴
149 | 布
150 | 希
151 | 帕
152 | 常
153 | 平
154 | 幸
155 | 庄
156 | 库
157 | 应
158 | 庞
159 | 康
160 | 廉
161 | 廖
162 | 延
163 | 弗
164 | 张
165 | 强
166 | 彦
167 | 彭
168 | 徐
169 | 德
170 | 慕容
171 | 戈
172 | 成
173 | 戚
174 | 戴
175 | 房
176 | 托
177 | 拉
178 | 招
179 | 摩
180 | 敖
181 | 斐
182 | 斯
183 | 方
184 | 於
185 | 昌
186 | 明
187 | 易
188 | 晋
189 | 晏
190 | 普
191 | 曹
192 | 曼
193 | 曾
194 | 朗
195 | 朱
196 | 朴
197 | 权
198 | 李
199 | 杜
200 | 来
201 | 杨
202 | 杭
203 | 杰
204 | 林
205 | 柏
206 | 查
207 | 柯
208 | 柳
209 | 柴
210 | 根
211 | 格
212 | 桂
213 | 桑
214 | 梁
215 | 梅
216 | 森
217 | 楚
218 | 楼
219 | 樊
220 | 欧阳
221 | 武
222 | 段
223 | 殷
224 | 比
225 | 毕
226 | 毛
227 | 江
228 | 池
229 | 汤
230 | 汪
231 | 沃
232 | 沈
233 | 沙
234 | 法
235 | 波
236 | 泰
237 | 泽
238 | 洛
239 | 洪
240 | 浦
241 | 涂
242 | 淳
243 | 温
244 | 游
245 | 湛
246 | 溥
247 | 滕
248 | 满
249 | 潘
250 | 澳
251 | 澹台
252 | 烈
253 | 焦
254 | 熊
255 | 燕
256 | 爱
257 | 爱新觉罗
258 | 牛
259 | 牟
260 | 特
261 | 狄
262 | 王
263 | 班
264 | 理
265 | 瑞
266 | 瑶
267 | 瓦
268 | 甄
269 | 甘
270 | 田
271 | 申
272 | 登
273 | 白
274 | 皇甫
275 | 皮
276 | 盖
277 | 盛
278 | 瞿
279 | 石
280 | 祁
281 | 祖
282 | 祝
283 | 福
284 | 禹
285 | 禾
286 | 科
287 | 秦
288 | 程
289 | 稽
290 | 穆
291 | 空
292 | 窦
293 | 章
294 | 端
295 | 竺
296 | 简
297 | 管
298 | 米
299 | 索
300 | 累
301 | 纪
302 | 纳
303 | 练
304 | 维
305 | 缪
306 | 罗
307 | 翁
308 | 翟
309 | 翦
310 | 耶
311 | 耿
312 | 聂
313 | 胡
314 | 胥
315 | 腓
316 | 腾
317 | 臧
318 | 舍
319 | 舒
320 | 良
321 | 艾
322 | 芬
323 | 芮
324 | 花
325 | 苏
326 | 苗
327 | 苟
328 | 英
329 | 范
330 | 茅
331 | 茨
332 | 荀
333 | 荆
334 | 荣
335 | 莫
336 | 莱
337 | 萧
338 | 萨
339 | 董
340 | 蒂
341 | 蒋
342 | 蒙
343 | 蒲
344 | 蓝
345 | 蓬
346 | 蔚
347 | 蔡
348 | 薛
349 | 虞
350 | 蚁
351 | 衡
352 | 袁
353 | 裘
354 | 裴
355 | 褚
356 | 西
357 | 解
358 | 言
359 | 詹
360 | 许
361 | 诸
362 | 诸葛
363 | 诺
364 | 谈
365 | 谢
366 | 谭
367 | 谷
368 | 贝
369 | 费
370 | 贺
371 | 贾
372 | 赖
373 | 赛
374 | 赫
375 | 赵
376 | 路
377 | 辛
378 | 辜
379 | 边
380 | 达
381 | 迈
382 | 连
383 | 迟
384 | 迪
385 | 逊
386 | 邓
387 | 邝
388 | 邢
389 | 那
390 | 邬
391 | 邰
392 | 邱
393 | 邵
394 | 邹
395 | 郁
396 | 郎
397 | 郑
398 | 郝
399 | 郭
400 | 都
401 | 里
402 | 金
403 | 钟
404 | 钮
405 | 钱
406 | 银
407 | 闵
408 | 闻
409 | 阎
410 | 阮
411 | 阳
412 | 阿
413 | 陈
414 | 陶
415 | 隆
416 | 雅
417 | 雷
418 | 霍
419 | 靳
420 | 韦
421 | 韩
422 | 项
423 | 顾
424 | 颜
425 | 饶
426 | 马
427 | 骆
428 | 高
429 | 魏
430 | 鱼
431 | 鲁
432 | 鲍
433 | 鲜
434 | 麦
435 | 麻
436 | 黄
437 | 黎
438 | 黛
439 | 齐
440 | 龙
441 | 龚
442 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/.compiled/most-words-mode/x-for-combinatorics.dic.compiled:
--------------------------------------------------------------------------------
 1 | a座
 2 | a计划
 3 | b座
 4 | b计划
 5 | b超
 6 | cd机
 7 | cd盒
 8 | c座
 9 | c盘
10 | d座
11 | d盘
12 | e座
13 | e盘
14 | f座
15 | f盘
16 | g盘
17 | h盘
18 | i盘
19 | j盘
20 | k歌之王
21 | k盘
22 | q版
23 | u盘
24 | z盘
25 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/.compiled/most-words-mode/x-noise-charactor.dic.compiled:
--------------------------------------------------------------------------------
  1 | 上
  2 | 下
  3 | 不
  4 | 与
  5 | 且
  6 | 个
  7 | 为
  8 | 乃
  9 | 么
 10 | 之
 11 | 也
 12 | 了
 13 | 于
 14 | 些
 15 | 亦
 16 | 人
 17 | 今
 18 | 仍
 19 | 从
 20 | 他
 21 | 以
 22 | 们
 23 | 但
 24 | 何
 25 | 你
 26 | 使
 27 | 儿
 28 | 其
 29 | 再
 30 | 几
 31 | 凡
 32 | 凭
 33 | 则
 34 | 别
 35 | 到
 36 | 即
 37 | 却
 38 | 去
 39 | 又
 40 | 及
 41 | 另
 42 | 只
 43 | 可
 44 | 各
 45 | 同
 46 | 后
 47 | 向
 48 | 吧
 49 | 和
 50 | 咱
 51 | 哇
 52 | 哟
 53 | 哪
 54 | 啥
 55 | 啦
 56 | 嗡
 57 | 嘛
 58 | 因
 59 | 在
 60 | 她
 61 | 好
 62 | 如
 63 | 它
 64 | 小
 65 | 尔
 66 | 已
 67 | 并
 68 | 当
 69 | 往
 70 | 很
 71 | 得
 72 | 怎
 73 | 您
 74 | 我
 75 | 或
 76 | 所
 77 | 打
 78 | 把
 79 | 拿
 80 | 据
 81 | 无
 82 | 既
 83 | 是
 84 | 曾
 85 | 最
 86 | 有
 87 | 来
 88 | 某
 89 | 此
 90 | 每
 91 | 比
 92 | 沿
 93 | 用
 94 | 由
 95 | 的
 96 | 看
 97 | 着
 98 | 给
 99 | 而
100 | 自
101 | 至
102 | 致
103 | 若
104 | 虽
105 | 被
106 | 让
107 | 该
108 | 诸
109 | 谁
110 | 起
111 | 趁
112 | 距
113 | 跟
114 | 还
115 | 这
116 | 那
117 | 随
118 | 靠
119 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/.compiled/most-words-mode/x-noise-word.dic.compiled:
--------------------------------------------------------------------------------
 1 | 万一
 2 | 不只
 3 | 不论
 4 | 乃至
 5 | 什么
 6 | 什么样
 7 | 他们
 8 | 以便
 9 | 以及
10 | 但是
11 | 何以
12 | 你们
13 | 倘若
14 | 值此
15 | 全部
16 | 其次
17 | 出于
18 | 分别
19 | 反之
20 | 可以
21 | 可是
22 | 向着
23 | 咱们
24 | 哪怕
25 | 因之
26 | 图文
27 | 大家
28 | 如上
29 | 如下
30 | 如是
31 | 如若
32 | 对待
33 | 对方
34 | 尽管
35 | 尽管如此
36 | 并且
37 | 当地
38 | 彼此
39 | 得了
40 | 我们
41 | 所以
42 | 所在
43 | 所有
44 | 才能
45 | 新浪
46 | 无论
47 | 无论如何
48 | 替代
49 | 有关
50 | 有时
51 | 本地
52 | 某些
53 | 根据
54 | 格里斯
55 | 此地
56 | 此处
57 | 此次
58 | 每当
59 | 照着
60 | 用来
61 | 由此
62 | 而外
63 | 而已
64 | 自己
65 | 至于
66 | 若是
67 | 虽然
68 | 要不
69 | 要不然
70 | 这些
71 | 这样
72 | 逐步
73 | 遵循
74 | 那样
75 | 随后
76 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/.compiled/most-words-mode/x-unit.dic.compiled:
--------------------------------------------------------------------------------
  1 | 万
  2 | 世
  3 | 世纪
  4 | 两
  5 | 个
  6 | 中
  7 | 乘
  8 | 井
  9 | 亩
 10 | 人
 11 | 人工作日
 12 | 人日
 13 | 人月日
 14 | 亿
 15 | 仙
 16 | 代
 17 | 件
 18 | 份
 19 | 伏
 20 | 伏特
 21 | 位
 22 | 例
 23 | 倍
 24 | 元
 25 | 兆
 26 | 光年
 27 | 克
 28 | 党
 29 | 公顷
 30 | 册
 31 | 出
 32 | 分
 33 | 分钟
 34 | 划
 35 | 列
 36 | 刻
 37 | 剧
 38 | 包
 39 | 匹
 40 | 区
 41 | 千
 42 | 升
 43 | 单
 44 | 卫
 45 | 卷
 46 | 厂
 47 | 厅
 48 | 厨
 49 | 口
 50 | 句
 51 | 只
 52 | 台
 53 | 号
 54 | 吨
 55 | 员
 56 | 周
 57 | 周岁
 58 | 周年
 59 | 品
 60 | 回
 61 | 团
 62 | 国
 63 | 圆
 64 | 圈
 65 | 场
 66 | 坪
 67 | 堆
 68 | 堵
 69 | 声
 70 | 壶
 71 | 处
 72 | 夜
 73 | 大
 74 | 天
 75 | 头
 76 | 女
 77 | 孔
 78 | 季
 79 | 安
 80 | 安培
 81 | 宗
 82 | 室
 83 | 家
 84 | 寸
 85 | 尺
 86 | 尾
 87 | 局
 88 | 层
 89 | 届
 90 | 岁
 91 | 市
 92 | 带
 93 | 幅
 94 | 幕
 95 | 平方米
 96 | 年
 97 | 年级
 98 | 床
 99 | 店
100 | 度
101 | 座
102 | 弄
103 | 式
104 | 张
105 | 微克
106 | 微秒
107 | 微米
108 | 快
109 | 成
110 | 房
111 | 批
112 | 把
113 | 折
114 | 抽
115 | 捧
116 | 撮
117 | 支
118 | 斤
119 | 族
120 | 日
121 | 时
122 | 晚
123 | 曲
124 | 月
125 | 期
126 | 本
127 | 朵
128 | 束
129 | 条
130 | 杯
131 | 柜
132 | 栋
133 | 样
134 | 根
135 | 桌
136 | 桶
137 | 楼
138 | 次
139 | 步
140 | 段
141 | 毫
142 | 毫克
143 | 毫分
144 | 毫升
145 | 毫秒
146 | 毫米
147 | 洞
148 | 派
149 | 滴
150 | 点
151 | 片
152 | 牛
153 | 环
154 | 班
155 | 瓶
156 | 男
157 | 盏
158 | 盒
159 | 盘
160 | 种
161 | 科
162 | 秒
163 | 秒钟
164 | 立方米
165 | 站
166 | 章
167 | 笔
168 | 等
169 | 箱
170 | 米
171 | 粒
172 | 级
173 | 线
174 | 维
175 | 缸
176 | 群
177 | 翻
178 | 艘
179 | 节
180 | 英寸
181 | 行
182 | 袋
183 | 角
184 | 课
185 | 路
186 | 车
187 | 轮
188 | 辆
189 | 辈
190 | 辑
191 | 道
192 | 部
193 | 里
194 | 重
195 | 针
196 | 钱
197 | 门
198 | 间
199 | 阶
200 | 隔
201 | 集
202 | 面
203 | 页
204 | 颗
205 | 首
206 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/administrative.dic:
--------------------------------------------------------------------------------
 1 | 人事部
 2 | 信息产业部
 3 | 农业部
 4 | 医管局
 5 | 发改委
 6 | 国土资源部
 7 | 国防部
 8 | 外交部
 9 | 教育部
10 | 文化部
11 | 民政部
12 | 能源部
13 | 能源部
14 | 财政部
15 | 铁道部
16 | 防卫厅
17 | 防卫省
18 | 革命委员会


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/appellation.dic:
--------------------------------------------------------------------------------
 1 | 中队长
 2 | 主任
 3 | 主席
 4 | 军长
 5 | 医生
 6 | 博士
 7 | 厂长
 8 | 司令
 9 | 大队长
10 | 夫人
11 | 小队长
12 | 局长
13 | 师傅
14 | 师长
15 | 总统
16 | 指导
17 | 排长
18 | 教授
19 | 教练
20 | 旅长
21 | 校长
22 | 班长
23 | 秘书
24 | 组长
25 | 经理
26 | 老师
27 | 营长
28 | 董事
29 | 董事长
30 | 连长
31 | 队长
32 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/company.dic:
--------------------------------------------------------------------------------
 1 | 中国中央电视台
 2 | 中国电信有限公司
 3 | 中国移动通讯有限公司
 4 | 中国网通有限公司
 5 | 中国联合通讯有限公司
 6 | 中国联通
 7 | 中央电视台
 8 | 北京百度科技发展有限公司
 9 | 央视
10 | 电信
11 | 百度
12 | 移动
13 | 网通
14 | 联通
15 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/comupter-science.dic:
--------------------------------------------------------------------------------
1 | 主板
2 | 内存
3 | 键盘
4 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/contemporary-words.dic:
--------------------------------------------------------------------------------
1 | 支付宝
2 | 斑竹
3 | 站长
4 | 贝宝
5 | 陶宝


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/division/africa.dic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/src/week7/dic/division/africa.dic


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/division/america.dic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/src/week7/dic/division/america.dic


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/division/europe.dic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/src/week7/dic/division/europe.dic


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/division/japan.dic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/src/week7/dic/division/japan.dic


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/division/korea.dic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/src/week7/dic/division/korea.dic


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/division/oceania.dic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/src/week7/dic/division/oceania.dic


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/division/readme.txt:
--------------------------------------------------------------------------------
1 | 地区划分在此记录
2 | 比如中国的省市县，国外的洲、河流等


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/division/taiwan.dic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/src/week7/dic/division/taiwan.dic


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/festival.dic:
--------------------------------------------------------------------------------
  1 | 七七纪念日
  2 | 七夕
  3 | 七夕情人节
  4 | 七夕节
  5 | 万圣节
  6 | 世界人权日
  7 | 世界儿歌节
  8 | 世界儿童节
  9 | 世界动物日
 10 | 世界卫生日
 11 | 世界地球日
 12 | 世界教师日
 13 | 世界无烟日
 14 | 世界无童工日
 15 | 世界林业节
 16 | 世界森林日
 17 | 世界水日
 18 | 世界海洋日
 19 | 世界湿地日
 20 | 世界献血日
 21 | 世界环境日
 22 | 世界电视日
 23 | 世界睡眠日
 24 | 世界粮食日
 25 | 世界精神卫生日
 26 | 世界红十字日
 27 | 世界问候日
 28 | 中国人民抗日战争纪念日
 29 | 中国国耻日
 30 | 中国学生营养日
 31 | 中国爱牙日
 32 | 中国爱耳日
 33 | 中国青年志愿者服务日
 34 | 中国青年节
 35 | 中秋
 36 | 中秋节
 37 | 人口日
 38 | 人权日
 39 | 儿歌节
 40 | 儿童节
 41 | 元宵
 42 | 元宵节
 43 | 元旦
 44 | 党生日
 45 | 全国中小学生安全教育日
 46 | 全国助残日
 47 | 全国爱眼日
 48 | 全国爱耳日
 49 | 六十亿人口日
 50 | 六四纪念日
 51 | 冬至
 52 | 减轻自然灾害日
 53 | 动物日
 54 | 助残日
 55 | 劳动妇女节
 56 | 劳动节
 57 | 博物馆日
 58 | 卫生日
 59 | 和平日
 60 | 国庆
 61 | 国庆节
 62 | 国耻日
 63 | 国际儿童节
 64 | 国际减轻自然灾害日
 65 | 国际劳动妇女节
 66 | 国际劳动节
 67 | 国际博物馆日
 68 | 国际和平日
 69 | 国际奥林匹克日
 70 | 国际妇女节
 71 | 国际容忍日
 72 | 国际左撇子日
 73 | 国际志愿者日
 74 | 国际护士节
 75 | 国际无车日
 76 | 国际残疾人日
 77 | 国际母语日
 78 | 国际气象节
 79 | 国际消费者权益日
 80 | 国际牛奶日
 81 | 国际盲人节
 82 | 国际禁毒日
 83 | 国际老人日
 84 | 国际臭氧层保护日
 85 | 国际非洲儿童日
 86 | 国际音乐日
 87 | 国际麻风日
 88 | 圣诞节
 89 | 地球日
 90 | 处暑
 91 | 复活节
 92 | 夏至
 93 | 大寒
 94 | 大暑
 95 | 大雪
 96 | 奥林匹克日
 97 | 妇女节
 98 | 学生营养日
 99 | 安全教育日
100 | 安全日
101 | 容忍日
102 | 寒露
103 | 小寒
104 | 小年
105 | 小暑
106 | 小满
107 | 小雪
108 | 左撇子日
109 | 平安夜
110 | 建党日
111 | 建军节
112 | 志愿人员日
113 | 志愿者日
114 | 情人节
115 | 惊蛰
116 | 愚人节
117 | 感恩节
118 | 扫房日
119 | 抗日战争纪念日
120 | 抗日纪念日
121 | 护士节
122 | 教师日
123 | 教师节
124 | 文化遗产日
125 | 无烟日
126 | 无童工日
127 | 无车日
128 | 春分
129 | 春节
130 | 植树节
131 | 残疾人日
132 | 母亲节
133 | 母语日
134 | 气象节
135 | 水日
136 | 海洋日
137 | 消费者权益日
138 | 清明
139 | 清明节
140 | 湿地日
141 | 爱牙日
142 | 爱眼日
143 | 爱耳日
144 | 父亲节
145 | 牛奶日
146 | 独立日
147 | 献血日
148 | 环境日
149 | 电视日
150 | 白露
151 | 盲人节
152 | 睡眠日
153 | 秋分
154 | 立冬
155 | 立夏
156 | 立春
157 | 立秋
158 | 端午节
159 | 粮食日
160 | 精神卫生日
161 | 红十字日
162 | 老人日
163 | 联合国日
164 | 腊八节
165 | 腊日
166 | 臭氧保护日
167 | 臭氧层保护日
168 | 芒种
169 | 营养日
170 | 谷雨
171 | 重阳
172 | 重阳节
173 | 问候日
174 | 除夕
175 | 雨水
176 | 霜降
177 | 青年志愿者服务日
178 | 青年节
179 | 非洲儿童日
180 | 音乐日
181 | 麻风日
182 | 龙头节
183 | -182
184 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/language.dic:
--------------------------------------------------------------------------------
 1 | 中文
 2 | 台湾话
 3 | 台语
 4 | 客家话
 5 | 汉字
 6 | 汉语
 7 | 法文
 8 | 法语
 9 | 福建话
10 | 粤语
11 | 美语
12 | 英文
13 | 英语
14 | 西班牙语
15 | 闽南语
16 | -15


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/locale/beijing.dic:
--------------------------------------------------------------------------------
 1 | 健翔桥
 2 | 北医大
 3 | 四惠东
 4 | 复兴门
 5 | 天安门
 6 | 德胜门
 7 | 德胜门西
 8 | 新街口
 9 | 朝阳门
10 | 正阳门
11 | 水立方
12 | 积水潭
13 | 积水潭桥
14 | 苹果园
15 | 西直门
16 | 长安街
17 | -15
18 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/locale/quanzhou.dic:
--------------------------------------------------------------------------------
1 | 东西塔
2 | 崇武
3 | 惠安
4 | 洛阳桥
5 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/locale/readme.txt:
--------------------------------------------------------------------------------
1 | 各地方街道等在此录入


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/locale/xiamen.dic:
--------------------------------------------------------------------------------
1 | 思明区


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/name-foreign.dic:
--------------------------------------------------------------------------------
 1 | 亚历山大
 2 | 克林顿
 3 | 克里斯汀
 4 | 布什
 5 | 布莱尔
 6 | 科特勒
 7 | 约翰
 8 | 约翰逊
 9 | 蒂娜
10 | -11
11 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/nation.dic:
--------------------------------------------------------------------------------
 1 | 东非
 2 | 中华
 3 | 中华 
 4 | 中华人民共和国
 5 | 中华民国
 6 | 中国
 7 | 中国
 8 | 中非
 9 | 乌克兰
10 | 也门
11 | 以色列
12 | 伊拉克
13 | 伊朗
14 | 俄罗斯
15 | 分类
16 | 加拿大
17 | 南非
18 | 古巴
19 | 台湾
20 | 埃及
21 | 塞尔维亚
22 | 墨西哥
23 | 威尔士
24 | 尼日利亚
25 | 巴比伦
26 | 希腊
27 | 德国
28 | 德意志
29 | 意大利
30 | 捷克
31 | 日本
32 | 朝鲜
33 | 比利时
34 | 法兰西
35 | 法国
36 | 波兰
37 | 波黑
38 | 瑞典
39 | 瑞士
40 | 白俄罗斯
41 | 缅甸
42 | 美利坚
43 | 美利坚合众国
44 | 美国
45 | 老挝
46 | 苏格兰
47 | 苏联
48 | 英国
49 | 英格兰
50 | 葡萄牙
51 | 蒙古
52 | 西班牙
53 | 越南
54 | 韩国
55 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/org-domestic.dic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Bellonor/myHadoopProject/5b7f551dda6daf86489eeed8370d868a90a5cb8e/com.homework/src/week7/dic/org-domestic.dic


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/org-foreign.dic:
--------------------------------------------------------------------------------
 1 | 上海合作组织
 2 | 世卫
 3 | 世界卫生组织
 4 | 世界银行
 5 | 东盟
 6 | 亚太经合组织
 7 | 人权理事会
 8 | 六方会谈
 9 | 北约
10 | 哈马斯
11 | 安全理事会
12 | 安理会
13 | 欧佩克
14 | 红十字会
15 | 联合国
16 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/paoding-dic-names.properties:
--------------------------------------------------------------------------------
 1 | #dictionary character encoding
 2 | #paoding.dic.charset=UTF-8
 3 | 
 4 | ### Set maximum word length (Chinese character) that analyzer can support. Longer words will be ignored.
 5 | ### By default, it is set to "0", which means all words will be analyzed.
 6 | #paoding.dic.maxWordLen=0
 7 | 
 8 | #dictionaries which are skip
 9 | #paoding.dic.skip.prefix=x-
10 | 
11 | #chinese/cjk charactors that will not token
12 | #paoding.dic.noise-charactor=x-noise-charactor
13 | 
14 | #chinese/cjk words that will not token
15 | #paoding.dic.noise-word=x-noise-word
16 | 
17 | #unit words, like "ge", "zhi", ...
18 | #paoding.dic.unit=x-unit
19 | 
20 | #like "Wang", "Zhang", ...
21 | #paoding.dic.confucian-family-name=x-confucian-family-name
22 | 
23 | #linke "uPAN", "cdHE"
24 | #paoding.dic.for-combinatorics=x-for-combinatorics
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/star-domestic.dic:
--------------------------------------------------------------------------------
 1 | 丁俊辉
 2 | 乾隆
 3 | 刘德华
 4 | 刘翔
 5 | 华仔
 6 | 周杰伦
 7 | 姚明
 8 | 小丁
 9 | 小辉
10 | 庖丁
11 | 康熙
12 | 张学友
13 | 朱军
14 | 朱德
15 | 朱德茂
16 | 朱镕基
17 | 李世民
18 | 李瑞环
19 | 武则天
20 | 毛主席
21 | 毛泽东
22 | 江泽民
23 | 老许
24 | 胡志明
25 | 胡锦涛
26 | 许静蕾
27 | 诸葛亮
28 | 赵本山
29 | 陈佩斯
30 | 马云
31 | 马加爵
32 | -30
33 | #历史、政治、学术、企业、娱乐、体育、社会现象
34 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/star-foreign.dic:
--------------------------------------------------------------------------------
1 | 比尔
2 | 盖茨
3 | -2
4 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/x-confucian-family-name.dic:
--------------------------------------------------------------------------------
  1 | 丁
  2 | 万
  3 | 上官
  4 | 丘
  5 | 东郭
  6 | 严
  7 | 丰
  8 | 乃
  9 | 乌
 10 | 乐
 11 | 乔
 12 | 习
 13 | 云
 14 | 亚
 15 | 什
 16 | 仇
 17 | 仝
 18 | 任
 19 | 伊
 20 | 伍
 21 | 伏
 22 | 休
 23 | 伦
 24 | 伯
 25 | 何
 26 | 佘
 27 | 余
 28 | 佟
 29 | 佩
 30 | 侯
 31 | 俄
 32 | 保
 33 | 俞
 34 | 倪
 35 | 傅
 36 | 储
 37 | 元
 38 | 克
 39 | 公孙
 40 | 兰
 41 | 关
 42 | 兹
 43 | 内
 44 | 冈
 45 | 冉
 46 | 农
 47 | 冯
 48 | 况
 49 | 冷
 50 | 冼
 51 | 凌
 52 | 凤
 53 | 凯
 54 | 刀
 55 | 刁
 56 | 切
 57 | 刘
 58 | 利
 59 | 加
 60 | 努
 61 | 励
 62 | 劳
 63 | 勒
 64 | 勾
 65 | 包
 66 | 匡
 67 | 匹
 68 | 华
 69 | 卓
 70 | 单
 71 | 单于
 72 | 博
 73 | 卜
 74 | 卞
 75 | 卡
 76 | 卢
 77 | 卫
 78 | 印
 79 | 危
 80 | 厄
 81 | 厉
 82 | 及
 83 | 古
 84 | 可
 85 | 史
 86 | 叶
 87 | 司徒
 88 | 司空
 89 | 司马
 90 | 合
 91 | 吉
 92 | 吕
 93 | 吴
 94 | 周
 95 | 哈
 96 | 哥
 97 | 唐
 98 | 商
 99 | 喀
100 | 喻
101 | 图
102 | 土
103 | 坦
104 | 埃
105 | 基
106 | 塔
107 | 塞
108 | 墨
109 | 夏
110 | 多
111 | 大
112 | 夫
113 | 奇
114 | 奚
115 | 奥
116 | 姆
117 | 姚
118 | 姜
119 | 姬
120 | 娄
121 | 孔
122 | 孙
123 | 孟
124 | 季
125 | 安
126 | 宋
127 | 宗
128 | 官
129 | 宝
130 | 宣
131 | 宫
132 | 容
133 | 宾
134 | 寿
135 | 封
136 | 尉
137 | 小泉
138 | 尔
139 | 尤
140 | 尹
141 | 尼
142 | 居
143 | 屈
144 | 屠
145 | 岑
146 | 岳
147 | 崔
148 | 左
149 | 巫
150 | 巴
151 | 布
152 | 希
153 | 帕
154 | 常
155 | 平
156 | 幸
157 | 庄
158 | 库
159 | 应
160 | 庞
161 | 康
162 | 廉
163 | 廖
164 | 延
165 | 弗
166 | 张
167 | 强
168 | 彦
169 | 彭
170 | 徐
171 | 德
172 | 慕容
173 | 戈
174 | 成
175 | 戚
176 | 戴
177 | 房
178 | 托
179 | 拉
180 | 招
181 | 摩
182 | 敖
183 | 斐
184 | 斯
185 | 方
186 | 於
187 | 昌
188 | 明
189 | 易
190 | 晋
191 | 晏
192 | 普
193 | 曹
194 | 曼
195 | 曾
196 | 朗
197 | 朱
198 | 朴
199 | 权
200 | 李
201 | 杜
202 | 来
203 | 杨
204 | 杭
205 | 杰
206 | 林
207 | 柏
208 | 查
209 | 柯
210 | 柳
211 | 柴
212 | 根
213 | 格
214 | 桂
215 | 桑
216 | 梁
217 | 梅
218 | 森
219 | 楚
220 | 楼
221 | 樊
222 | 欧阳
223 | 武
224 | 段
225 | 殷
226 | 比
227 | 毕
228 | 毛
229 | 江
230 | 池
231 | 汤
232 | 汪
233 | 沃
234 | 沈
235 | 沙
236 | 法
237 | 波
238 | 泰
239 | 泽
240 | 洛
241 | 洪
242 | 浦
243 | 涂
244 | 淳
245 | 温
246 | 游
247 | 湛
248 | 溥
249 | 滕
250 | 满
251 | 潘
252 | 澳
253 | 澹台
254 | 烈
255 | 焦
256 | 熊
257 | 燕
258 | 爱
259 | 爱新觉罗
260 | 牛
261 | 牟
262 | 特
263 | 狄
264 | 王
265 | 班
266 | 理
267 | 瑞
268 | 瑶
269 | 瓦
270 | 甄
271 | 甘
272 | 田
273 | 申
274 | 登
275 | 白
276 | 皇甫
277 | 皮
278 | 盖
279 | 盛
280 | 瞿
281 | 石
282 | 祁
283 | 祖
284 | 祝
285 | 福
286 | 禹
287 | 禾
288 | 科
289 | 秦
290 | 程
291 | 稽
292 | 穆
293 | 空
294 | 窦
295 | 章
296 | 端
297 | 竺
298 | 简
299 | 管
300 | 米
301 | 索
302 | 累
303 | 纪
304 | 纳
305 | 练
306 | 维
307 | 缪
308 | 罗
309 | 翁
310 | 翟
311 | 翦
312 | 耶
313 | 耿
314 | 聂
315 | 胡
316 | 胥
317 | 腓
318 | 腾
319 | 臧
320 | 舍
321 | 舒
322 | 良
323 | 艾
324 | 芬
325 | 芮
326 | 花
327 | 苏
328 | 苗
329 | 苟
330 | 英
331 | 范
332 | 茅
333 | 茨
334 | 荀
335 | 荆
336 | 荣
337 | 莫
338 | 莱
339 | 萧
340 | 萨
341 | 董
342 | 蒂
343 | 蒋
344 | 蒙
345 | 蒲
346 | 蓝
347 | 蓬
348 | 蔚
349 | 蔡
350 | 薛
351 | 虞
352 | 蚁
353 | 衡
354 | 袁
355 | 裘
356 | 裴
357 | 褚
358 | 西
359 | 解
360 | 言
361 | 詹
362 | 许
363 | 诸
364 | 诸葛
365 | 诺
366 | 谈
367 | 谢
368 | 谭
369 | 谷
370 | 贝
371 | 费
372 | 贺
373 | 贾
374 | 赖
375 | 赛
376 | 赫
377 | 赵
378 | 路
379 | 辛
380 | 辜
381 | 边
382 | 达
383 | 迈
384 | 连
385 | 迟
386 | 迪
387 | 逊
388 | 邓
389 | 邝
390 | 邢
391 | 那
392 | 邬
393 | 邰
394 | 邱
395 | 邵
396 | 邹
397 | 郁
398 | 郎
399 | 郑
400 | 郝
401 | 郭
402 | 都
403 | 里
404 | 金
405 | 钟
406 | 钮
407 | 钱
408 | 银
409 | 闵
410 | 闻
411 | 阎
412 | 阮
413 | 阳
414 | 阿
415 | 陆
416 | 陈
417 | 陶
418 | 隆
419 | 雅
420 | 雷
421 | 霍
422 | 靳
423 | 韦
424 | 韩
425 | 项
426 | 顾
427 | 颜
428 | 饶
429 | 马
430 | 骆
431 | 高
432 | 魏
433 | 鱼
434 | 鲁
435 | 鲍
436 | 鲜
437 | 麦
438 | 麻
439 | 黄
440 | 黎
441 | 黛
442 | 齐
443 | 龙
444 | 龚
445 | -444
446 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/x-for-combinatorics.dic:
--------------------------------------------------------------------------------
 1 | ﻿U盘
 2 | CD盒
 3 | CD机
 4 | C盘
 5 | D盘
 6 | E盘
 7 | F盘
 8 | G盘
 9 | H盘
10 | I盘
11 | J盘
12 | K盘
13 | Z盘
14 | K歌之王
15 | A座
16 | B座
17 | C座
18 | D座
19 | E座
20 | F座
21 | A计划
22 | B计划
23 | B超
24 | Q版
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/x-noise-charactor.dic:
--------------------------------------------------------------------------------
  1 | ﻿的
  2 | 一
  3 | 不
  4 | 在
  5 | 人
  6 | 有
  7 | 是
  8 | 为
  9 | 以
 10 | 于
 11 | 上
 12 | 他
 13 | 而
 14 | 后
 15 | 之
 16 | 来
 17 | 及
 18 | 了
 19 | 因
 20 | 下
 21 | 可
 22 | 到
 23 | 由
 24 | 这
 25 | 与
 26 | 也
 27 | 此
 28 | 但
 29 | 并
 30 | 个
 31 | 其
 32 | 已
 33 | 无
 34 | 小
 35 | 我
 36 | 们
 37 | 起
 38 | 最
 39 | 再
 40 | 今
 41 | 去
 42 | 好
 43 | 只
 44 | 又
 45 | 或
 46 | 很
 47 | 亦
 48 | 某
 49 | 把
 50 | 那
 51 | 你
 52 | 乃
 53 | 它
 54 | 吧
 55 | 被
 56 | 比
 57 | 别
 58 | 趁
 59 | 当
 60 | 从
 61 | 到
 62 | 得
 63 | 打
 64 | 凡
 65 | 儿
 66 | 尔
 67 | 该
 68 | 各
 69 | 给
 70 | 跟
 71 | 和
 72 | 何
 73 | 还
 74 | 即
 75 | 几
 76 | 既
 77 | 看
 78 | 据
 79 | 距
 80 | 靠
 81 | 啦
 82 | 了
 83 | 另
 84 | 么
 85 | 每
 86 | 们
 87 | 嘛
 88 | 拿
 89 | 哪
 90 | 那
 91 | 您
 92 | 凭
 93 | 且
 94 | 却
 95 | 让
 96 | 仍
 97 | 啥
 98 | 如
 99 | 若
100 | 使
101 | 谁
102 | 虽
103 | 随
104 | 同
105 | 所
106 | 她
107 | 哇
108 | 嗡
109 | 往
110 | 哪
111 | 些
112 | 向
113 | 沿
114 | 哟
115 | 用
116 | 于
117 | 咱
118 | 则
119 | 怎
120 | 曾
121 | 至
122 | 致
123 | 着
124 | 诸
125 | 自


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/x-noise-word.dic:
--------------------------------------------------------------------------------
 1 | ﻿你们
 2 | 我们
 3 | 他们
 4 | 那样
 5 | 所以
 6 | 得了
 7 | 当地
 8 | 有关
 9 | 所有
10 | 因之
11 | 用来
12 | 所在
13 | 对待
14 | 而外
15 | 分别
16 | 某些
17 | 对方
18 | 不只
19 | 虽然
20 | 无论
21 | 不论
22 | 无论如何
23 | 但是
24 | 全部
25 | 尽管
26 | 大家
27 | 以便
28 | 自己
29 | 可是
30 | 反之
31 | 这些
32 | 什么
33 | 由此
34 | 万一
35 | 而已
36 | 何以
37 | 咱们
38 | 值此
39 | 向着
40 | 哪怕
41 | 倘若
42 | 出于
43 | 如上
44 | 如若
45 | 替代
46 | 什么样
47 | 如是
48 | 照着
49 | 此处
50 | 这样
51 | 每当
52 | 此次
53 | 至于
54 | 此地
55 | 要不然
56 | 逐步
57 | 格里斯
58 | 本地
59 | 要不
60 | 其次
61 | 尽管如此
62 | 遵循
63 | 乃至
64 | 若是
65 | 并且
66 | 如下
67 | 可以
68 | 才能
69 | 以及
70 | 彼此
71 | 根据
72 | 随后
73 | 有时
74 | 图文
75 | 新浪


--------------------------------------------------------------------------------
/com.homework/src/week7/dic/x-unit.dic:
--------------------------------------------------------------------------------
  1 | 万
  2 | 世
  3 | 世纪
  4 | 两
  5 | 个
  6 | 中
  7 | 乘
  8 | 井
  9 | 亩
 10 | 人
 11 | 人工作日
 12 | 人日
 13 | 人月日
 14 | 亿
 15 | 仙
 16 | 代
 17 | 件
 18 | 份
 19 | 伏
 20 | 伏特
 21 | 位
 22 | 例
 23 | 倍
 24 | 元
 25 | 兆
 26 | 光年
 27 | 克
 28 | 党
 29 | 公顷
 30 | 册
 31 | 出
 32 | 分
 33 | 分钟
 34 | 划
 35 | 列
 36 | 刻
 37 | 剧
 38 | 包
 39 | 匹
 40 | 区
 41 | 千
 42 | 升
 43 | 单
 44 | 卫
 45 | 卷
 46 | 厂
 47 | 厅
 48 | 厨
 49 | 口
 50 | 句
 51 | 只
 52 | 台
 53 | 号
 54 | 吨
 55 | 员
 56 | 周
 57 | 周岁
 58 | 周年
 59 | 品
 60 | 回
 61 | 团
 62 | 国
 63 | 圆
 64 | 圈
 65 | 场
 66 | 坪
 67 | 堆
 68 | 堵
 69 | 声
 70 | 壶
 71 | 处
 72 | 夜
 73 | 大
 74 | 天
 75 | 头
 76 | 女
 77 | 孔
 78 | 季
 79 | 安
 80 | 安培
 81 | 宗
 82 | 室
 83 | 家
 84 | 寸
 85 | 尺
 86 | 尾
 87 | 局
 88 | 层
 89 | 届
 90 | 岁
 91 | 市
 92 | 带
 93 | 幅
 94 | 幕
 95 | 平方米
 96 | 年
 97 | 年级
 98 | 床
 99 | 店
100 | 度
101 | 座
102 | 弄
103 | 式
104 | 张
105 | 微克
106 | 微秒
107 | 微米
108 | 快
109 | 成
110 | 房
111 | 批
112 | 把
113 | 折
114 | 抽
115 | 捧
116 | 撮
117 | 支
118 | 斤
119 | 族
120 | 日
121 | 时
122 | 晚
123 | 曲
124 | 月
125 | 期
126 | 本
127 | 朵
128 | 束
129 | 条
130 | 杯
131 | 柜
132 | 栋
133 | 样
134 | 根
135 | 桌
136 | 桶
137 | 楼
138 | 次
139 | 步
140 | 段
141 | 毫
142 | 毫克
143 | 毫分
144 | 毫升
145 | 毫秒
146 | 毫米
147 | 洞
148 | 派
149 | 滴
150 | 点
151 | 片
152 | 牛
153 | 环
154 | 班
155 | 瓶
156 | 男
157 | 盏
158 | 盒
159 | 盘
160 | 种
161 | 科
162 | 秒
163 | 秒钟
164 | 立方米
165 | 站
166 | 章
167 | 笔
168 | 等
169 | 箱
170 | 米
171 | 粒
172 | 级
173 | 线
174 | 维
175 | 缸
176 | 群
177 | 翻
178 | 艘
179 | 节
180 | 英寸
181 | 行
182 | 袋
183 | 角
184 | 课
185 | 路
186 | 车
187 | 轮
188 | 辆
189 | 辈
190 | 辑
191 | 道
192 | 部
193 | 里
194 | 重
195 | 针
196 | 钱
197 | 门
198 | 间
199 | 阶
200 | 隔
201 | 集
202 | 面
203 | 页
204 | 颗
205 | 首
206 | -205
207 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/myInputFormat/JamesInputFormat.java:
--------------------------------------------------------------------------------
 1 | package myInputFormat;
 2 | import java.io.IOException;
 3 | 
 4 | import org.apache.hadoop.io.BytesWritable;
 5 | import org.apache.hadoop.io.LongWritable;
 6 | import org.apache.hadoop.mapreduce.InputSplit;
 7 | import org.apache.hadoop.mapreduce.RecordReader;
 8 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
 9 | import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat;
10 | import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader;
11 | import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
12 | public class JamesInputFormat extends CombineFileInputFormat<LongWritable, BytesWritable>{
13 | 	@Override
14 | 	public RecordReader<LongWritable, BytesWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException {
15 | 
16 | 		CombineFileSplit combineFileSplit = (CombineFileSplit) split;
17 | 		CombineFileRecordReader<LongWritable, BytesWritable> recordReader = new CombineFileRecordReader<LongWritable, BytesWritable>(combineFileSplit, context, JamesRecordReader.class);
18 | 		try {
19 | 			recordReader.initialize(combineFileSplit, context);
20 | 		} catch (InterruptedException e) {
21 | 			new RuntimeException("Error to initialize CombineSmallfileRecordReader.");
22 | 		}
23 | 		return recordReader;
24 | 	}
25 | }
26 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/myInputFormat/JamesRecordReader.java:
--------------------------------------------------------------------------------
 1 | package myInputFormat;
 2 | import java.io.IOException;
 3 | 
 4 | import org.apache.hadoop.fs.Path;
 5 | import org.apache.hadoop.io.BytesWritable;
 6 | import org.apache.hadoop.io.LongWritable;
 7 | import org.apache.hadoop.mapreduce.InputSplit;
 8 | import org.apache.hadoop.mapreduce.RecordReader;
 9 | import org.apache.hadoop.mapreduce.TaskAttemptContext;
10 | import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
11 | import org.apache.hadoop.mapreduce.lib.input.FileSplit;
12 | import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
13 | public class JamesRecordReader extends RecordReader<LongWritable, BytesWritable>{
14 | 	private CombineFileSplit combineFileSplit;
15 | 	private LineRecordReader lineRecordReader = new LineRecordReader();
16 | 	private Path[] paths;
17 | 	private int totalLength;
18 | 	private int currentIndex;
19 | 	private float currentProgress = 0;
20 | 	private LongWritable currentKey;
21 | 	private BytesWritable currentValue = new BytesWritable();;
22 | 
23 | 	public JamesRecordReader(CombineFileSplit combineFileSplit, TaskAttemptContext context, Integer index) throws IOException {
24 | 		super();
25 | 		this.combineFileSplit = combineFileSplit;
26 | 		this.currentIndex = index; // 当前要处理的小文件Block在CombineFileSplit中的索引
27 | 	}
28 | 
29 | 	@Override
30 | 	public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
31 | 		this.combineFileSplit = (CombineFileSplit) split;
32 | 		// 处理CombineFileSplit中的一个小文件Block，因为使用LineRecordReader，需要构造一个FileSplit对象，然后才能够读取数据
33 | 		FileSplit fileSplit = new FileSplit(combineFileSplit.getPath(currentIndex), combineFileSplit.getOffset(currentIndex), combineFileSplit.getLength(currentIndex), combineFileSplit.getLocations());
34 | 		lineRecordReader.initialize(fileSplit, context);
35 | 
36 | 		this.paths = combineFileSplit.getPaths();
37 | 		totalLength = paths.length;
38 | 		context.getConfiguration().set("map.input.file.name", combineFileSplit.getPath(currentIndex).getName());
39 | 	}
40 | 
41 | 	@Override
42 | 	public LongWritable getCurrentKey() throws IOException, InterruptedException {
43 | 		currentKey = lineRecordReader.getCurrentKey();
44 | 		return currentKey;
45 | 	}
46 | 
47 | 	@Override
48 | 	public BytesWritable getCurrentValue() throws IOException, InterruptedException {
49 | 		byte[] content = lineRecordReader.getCurrentValue().getBytes();
50 | 		currentValue.set(content, 0, content.length);
51 | 		return currentValue;
52 | 	}
53 | 
54 | 	@Override
55 | 	public boolean nextKeyValue() throws IOException, InterruptedException {
56 | 		if (currentIndex >= 0 && currentIndex < totalLength) {
57 | 			return lineRecordReader.nextKeyValue();
58 | 		} else {
59 | 			return false;
60 | 		}
61 | 	}
62 | 
63 | 	@Override
64 | 	public float getProgress() throws IOException {
65 | 		if (currentIndex >= 0 && currentIndex < totalLength) {
66 | 			currentProgress = (float) currentIndex / totalLength;
67 | 			return currentProgress;
68 | 		}
69 | 		return currentProgress;
70 | 	}
71 | 
72 | 	@Override
73 | 	public void close() throws IOException {
74 | 		lineRecordReader.close();
75 | 	}
76 | }
77 | 


--------------------------------------------------------------------------------
/com.homework/src/week7/myInputFormat/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package myInputFormat;


--------------------------------------------------------------------------------
/com.homework/src/week8/mrclassify/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 |  * 见scripts文件夹week8.rar文件，由于是用 mahout0.6写的，放在0.8的环境中不支持,git测试
3 |  */
4 | /**
5 |  * @author Administrator
6 |  *
7 |  */
8 | package mrclassify;


--------------------------------------------------------------------------------