├── .gitignore
├── sparkML
    ├── src
    │   └── main
    │   │   ├── factory
    │   │       ├── Algorithm.scala
    │   │       ├── AlgTrait.scala
    │   │       └── AlgorithmFactory.scala
    │   │   ├── optimizer
    │   │       ├── optimizer.scala
    │   │       └── FTRLProximal.scala
    │   │   ├── recommender
    │   │       ├── Recommender.scala
    │   │       ├── ALSRec.scala
    │   │       └── SlopOneRec.scala
    │   │   ├── app.scala
    │   │   ├── input
    │   │       ├── DataHolder.scala
    │   │       ├── recommend
    │   │       │   ├── RecDataHolder.scala
    │   │       │   ├── YahooDataHolder.scala
    │   │       │   └── NetflixDataHolder.scala
    │   │       ├── LRDataHolder.scala
    │   │       └── DataFactory.scala
    │   │   ├── Classifier
    │   │       ├── RegressionModel.scala
    │   │       └── LRWithFTRL.scala
    │   │   ├── util
    │   │       ├── SparkEnv.scala
    │   │       ├── Conf.scala
    │   │       └── MainHolder.scala
    │   │   └── linalg
    │   │       └── algUtil.scala
    └── sparkML.iml
├── README.md
├── LICENSE
└── bash
    └── splitDataset.py


/.gitignore:
--------------------------------------------------------------------------------
1 | */lib/
2 | */out/
3 | */META-INF/
4 | */scala-train/
5 | */mlTrains/
6 | */.idea/
7 | 


--------------------------------------------------------------------------------
/sparkML/src/main/factory/Algorithm.scala:
--------------------------------------------------------------------------------
1 | package main.factory
2 | 
3 | /**
4 |  * Created by zhy on 2015/8/2 0002.
5 |  */
6 | trait Algorithm extends RMSE with Serializable


--------------------------------------------------------------------------------
/sparkML/src/main/optimizer/optimizer.scala:
--------------------------------------------------------------------------------
1 | package main.optimizer
2 | 
3 | /**
4 |  * Created by zhy on 2015/8/1 0001.
5 |  */
6 | 
7 | trait Optimizer extends Serializable
8 | 


--------------------------------------------------------------------------------
/sparkML/src/main/recommender/Recommender.scala:
--------------------------------------------------------------------------------
1 | package main.recommender
2 | 
3 | import main.factory.Algorithm
4 | 
5 | /**
6 |  * Created by zhy on 2015/7/19 0019.
7 |  */
8 | 
9 | class Recommender extends Algorithm


--------------------------------------------------------------------------------
/sparkML/src/main/app.scala:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import main.util.{Conf, MainHolder}
 4 | 
 5 | /**
 6 |  * Created by zhy on 2015/7/19 0019.
 7 |  */
 8 | object app extends App {
 9 | 
10 |   override def main(args: Array[String]) {
11 |     val opt = new Conf(args)
12 | 
13 |     MainHolder.setUp(opt)
14 | 
15 |     MainHolder.calculateRMSE
16 |   }
17 | 
18 | }
19 | 


--------------------------------------------------------------------------------
/sparkML/src/main/input/DataHolder.scala:
--------------------------------------------------------------------------------
 1 | package main.input
 2 | 
 3 | import org.apache.spark.mllib.recommendation.Rating
 4 | import org.apache.spark.mllib.regression.LabeledPoint
 5 | import org.apache.spark.rdd.RDD
 6 | 
 7 | /**
 8 |  * Created by zhy on 2015/8/3 0003.
 9 |  */
10 | trait DataHolder extends Serializable {
11 |   def getLRData(): RDD[LabeledPoint]
12 | 
13 |   def getData(): RDD[Rating]
14 | 
15 |   def getDataDesc: Unit
16 | }
17 | 


--------------------------------------------------------------------------------
/sparkML/src/main/Classifier/RegressionModel.scala:
--------------------------------------------------------------------------------
 1 | package main.classifier
 2 | 
 3 | import main.factory.Algorithm
 4 | import main.optimizer.Optimizer
 5 | import org.apache.spark.mllib.regression.LabeledPoint
 6 | import org.apache.spark.rdd.RDD
 7 | 
 8 | /**
 9 |  * Created by zhy on 2015/8/2 0002.
10 |  */
11 | 
12 | /**
13 |  * 回归模型
14 |  */
15 | trait RegressionModel extends Algorithm with Serializable {
16 | 
17 |   //优化算法
18 |   def optimizer: Optimizer
19 | 
20 |   //训练及预测
21 |   def train(trainData: RDD[LabeledPoint]): Unit
22 | }
23 | 


--------------------------------------------------------------------------------
/sparkML/src/main/util/SparkEnv.scala:
--------------------------------------------------------------------------------
 1 | package main.util
 2 | 
 3 | /**
 4 |  * Created by zhy on 2015/7/18 0018.
 5 |  */
 6 | 
 7 | import org.apache.log4j.{Level, Logger}
 8 | import org.apache.spark.{SparkConf, SparkContext}
 9 | 
10 | /**
11 |  * 初始化SparkContext
12 |  */
13 | object SparkEnv {
14 | 
15 |   Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
16 |   Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)
17 | 
18 |   val conf = new SparkConf().setAppName("MachineLearningInSpark").setMaster("local[2]")
19 |   val sc = new SparkContext(conf)
20 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Machine Learning In Spark
 2 | 
 3 | Scalable system written in [Scala](http://www.scala-lang.org/) using the [Apache Spark framework](https://spark.apache.org/) get the framework design from [OndraFiedler/spark-recommender](https://github.com/OndraFiedler/spark-recommender).
 4 | 
 5 | ## Features
 6 | 
 7 | A ML framework to implement or develop ML algorithms which can run in spark.
 8 | 
 9 | ### Implemented Algorithms
10 | 
11 | - 1.ALS from Spark MLLib 
12 | - 2.Slop-One
13 | - 3.Logistic Regression with FTRL-Proximal
14 | 
15 | ### Dataset
16 | 
17 | - 1.NetFlix
18 | - 2.Yahoo
19 | - 3.Logistic Regression Dataset


--------------------------------------------------------------------------------
/sparkML/src/main/linalg/algUtil.scala:
--------------------------------------------------------------------------------
 1 | package main.linalg
 2 | 
 3 | import breeze.linalg.{SparseVector => BSV, Vector => BV}
 4 | import org.apache.spark.mllib.linalg.{SparseVector, Vector}
 5 | 
 6 | /**
 7 |  * Created by zhy on 2015/8/2 0002.
 8 |  */
 9 | object AlgUtil {
10 |   /**
11 |    * 向量->Breeze向量
12 |    * @param v Vector
13 |    * @return Breeze Vector
14 |    */
15 |   def VtoB(v: Vector): BV[Double] =
16 |     new BSV[Double](v.toSparse.indices, v.toSparse.values, v.toSparse.size)
17 | 
18 |   /**
19 |    * 稀疏向量->Breeze向量
20 |    * @param v SparseVector
21 |    * @return Breeze Vector
22 |    */
23 |   def StoB(v: SparseVector): BV[Double] = new BSV[Double](v.indices, v.values, v.size)
24 | }
25 | 


--------------------------------------------------------------------------------
/sparkML/sparkML.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="JAVA_MODULE" version="4">
 3 |   <component name="NewModuleRootManager" inherit-compiler-output="true">
 4 |     <exclude-output />
 5 |     <content url="file://$MODULE_DIR$">
 6 |       <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
 7 |     </content>
 8 |     <orderEntry type="inheritedJdk" />
 9 |     <orderEntry type="sourceFolder" forTests="false" />
10 |     <orderEntry type="library" name="scala-sdk-2.10.4" level="application" />
11 |     <orderEntry type="library" name="spark-assembly-1.4.0-hadoop2.6.0" level="project" />
12 |     <orderEntry type="library" name="scallop_2.10-0.9.5" level="project" />
13 |     <orderEntry type="library" name="junit-4.8.2" level="project" />
14 |   </component>
15 | </module>


--------------------------------------------------------------------------------
/sparkML/src/main/input/recommend/RecDataHolder.scala:
--------------------------------------------------------------------------------
 1 | package main.input.recommend
 2 | 
 3 | import main.input.DataHolder
 4 | import org.apache.spark.mllib.recommendation.Rating
 5 | import org.apache.spark.rdd.RDD
 6 | 
 7 | /**
 8 |  * Created by zhy on 2015/7/18 0018.
 9 |  */
10 | 
11 | /**
12 |  * 推荐算法数据接口，可获取相应的Rating和ID2Name映射
13 |  */
14 | 
15 | trait RecDataHolder extends DataHolder with Serializable {
16 |   protected val ratings: RDD[Rating]
17 |   protected val productsIDsToNameMap: Map[Int, String]
18 | 
19 |   override def getLRData = ???
20 | 
21 |   override def getData = getRatings
22 | 
23 |   override def getDataDesc = printRatingDesc
24 | 
25 |   def getRatings(): RDD[Rating] = ratings
26 | 
27 |   def printRatingDesc = println("数据集包含 " + ratings.count + " 条数据，来自 "
28 |     + ratings.map(_.user).distinct.count + " 个用户和 " + ratings.map(_.product).distinct.count + "件产品")
29 | 
30 |   def getIDToProductnameMap(): Map[Int, String] = productsIDsToNameMap
31 | 
32 |   def getNumOfProducts(): Int = productsIDsToNameMap.keys.max + 1
33 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Ondra Fiedler
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/sparkML/src/main/util/Conf.scala:
--------------------------------------------------------------------------------
 1 | package main.util
 2 | 
 3 | import main.factory.AlgorithmFactory
 4 | import main.input.DataFactory
 5 | import org.rogach.scallop.ScallopConf
 6 | 
 7 | /**
 8 |  * Created by zhy on 2015/7/19 0019.
 9 |  */
10 | 
11 | /**
12 |  * 命令行参数解析类
13 |  * @param arguments 命令行参数
14 |  */
15 | class Conf(arguments: Seq[String]) extends ScallopConf(arguments) {
16 | 
17 |   val datasetTypes = DataFactory.dataHolderList
18 |   val algorithms = AlgorithmFactory.AlgList
19 | 
20 |   banner( """
21 | Spark机器学习算法
22 | ----------------
23 | 基于Spark的机器学习算法库
24 | 
25 | 示例:
26 | spark-submit [Jar] --data Yahoo --dir /zhy/data/Yahoo/ --method ALS
27 | 
28 | 参数:
29 |           """)
30 | 
31 |   version("version 1.5.0")
32 | 
33 |   val data = opt[String](required = true, validate = { str => datasetTypes.map(_.getName).contains(str) }, descr = {
34 |     "数据集类型。可选类型： " + datasetTypes.map(_.getName).reduce(_ + ", " + _)
35 |   })
36 | 
37 |   val dir = opt[String](required = true, descr = "数据集根目录")
38 | 
39 |   val method = opt[String](required = true, validate = { str => algorithms.map(_.getName).contains(str) }, descr = {
40 |     "推荐算法。可选类型： " + algorithms.map(_.getName).reduce(_ + ", " + _)
41 |   })
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/sparkML/src/main/factory/AlgTrait.scala:
--------------------------------------------------------------------------------
 1 | package main.factory
 2 | 
 3 | import main.util.{MainHolder, SparkEnv}
 4 | 
 5 | /**
 6 |  * Created by zhy on 2015/8/2 0002.
 7 |  */
 8 | 
 9 | /**
10 |  * 推荐算法输入数据
11 |  */
12 | trait InputRecData extends Serializable {
13 |   protected val sc = SparkEnv.sc
14 |   protected val ratings = MainHolder.getDataHolder().getData
15 |   MainHolder.getDataHolder().getDataDesc
16 | 
17 |   //分割数据集为训练集、验证集、测试集
18 |   protected val RDD = ratings.randomSplit(Array(0.7, 0.2, 0.1))
19 |   protected val trainData = RDD(0).persist
20 |   protected val validateData = RDD(1).persist
21 |   protected val testData = RDD(2).persist
22 |   protected val numValidation = validateData.count
23 |   protected val numTest = testData.count
24 | }
25 | 
26 | /**
27 |  * LR输入数据
28 |  */
29 | trait InputLRData extends Serializable {
30 |   protected val sc = SparkEnv.sc
31 |   protected val data = MainHolder.getDataHolder().getLRData
32 |   MainHolder.getDataHolder().getDataDesc
33 | 
34 |   protected val RDD = data.randomSplit(Array(0.8, 0.2))
35 |   protected val trainData = RDD(0).persist
36 |   protected val testData = RDD(1).persist
37 | 
38 | }
39 | 
40 | /**
41 |  * 算法度量方式
42 |  */
43 | trait RMSE extends Serializable {
44 |   protected var RMSE: Double = Double.MaxValue
45 | 
46 |   /**
47 |    * @return 算法对于指定数据集推荐结果的均方根误差(RMSE)
48 |    */
49 |   def getRMSE = println("测试集的RMSE为 " + RMSE + "\n----------测试完毕----------")
50 | }
51 | 


--------------------------------------------------------------------------------
/sparkML/src/main/input/recommend/YahooDataHolder.scala:
--------------------------------------------------------------------------------
 1 | package main.input.recommend
 2 | 
 3 | import main.util.SparkEnv
 4 | import org.apache.spark.mllib.recommendation.Rating
 5 | import org.apache.spark.rdd.RDD
 6 | 
 7 | /**
 8 |  * Created by zhy on 2015/7/18 0018.
 9 |  */
10 | 
11 | /**
12 |  * @param dataDirectoryPath Yahoo数据集根目录
13 |  */
14 | class YahooDataHolder(dataDirectoryPath: String) extends RecDataHolder with Serializable {
15 |   override protected val ratings: RDD[Rating] = loadRatingsFromAFile()
16 |   override protected val productsIDsToNameMap: Map[Int, String] = loadIDsToProductnameMapFromADirectory(dataDirectoryPath)
17 | 
18 |   /**
19 |    * 从文件中读取Yahoo数据集评分
20 |    * @return RDD[Rating]
21 |    */
22 |   protected def loadRatingsFromAFile(): RDD[Rating] = {
23 |     val ratings = SparkEnv.sc.textFile(dataDirectoryPath + "data.txt")
24 |       .filter(line => formatSpace(line).split(" ").length >= 3)
25 |       .map { line =>
26 |       val lineFormat = formatSpace(line)
27 |       val fields = lineFormat.split(" ")
28 |       (Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble))
29 |     }
30 |     ratings
31 |   }
32 | 
33 |   /**
34 |    * 去除字符串中多于一个连续的空格
35 |    * @param line 输入字符串
36 |    * @return 去除多余空格后的字符串
37 |    */
38 |   protected def formatSpace(line: String): String = {
39 |     line.replaceAll("\\s+", " ")
40 |   }
41 | 
42 |   /**
43 |    *
44 |    * @param dataDirectoryPath Yahoo数据集根目录
45 |    * @return Map:musicID -> musicName
46 |    */
47 |   protected def loadIDsToProductnameMapFromADirectory(dataDirectoryPath: String): Map[Int, String] = {
48 |     null
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/sparkML/src/main/input/LRDataHolder.scala:
--------------------------------------------------------------------------------
 1 | package main.input
 2 | 
 3 | import main.util.SparkEnv
 4 | import org.apache.spark.mllib.linalg.SparseVector
 5 | import org.apache.spark.mllib.regression.LabeledPoint
 6 | import org.apache.spark.rdd.RDD
 7 | 
 8 | import scala.collection.mutable.ArrayBuffer
 9 | 
10 | /**
11 |  * Created by zhy on 2015/8/3 0003.
12 |  */
13 | 
14 | /**
15 |  * 逻辑回归数据集
16 |  * @param dataDirectoryPath 数据集根目录
17 |  */
18 | class LRDataHolder(dataDirectoryPath: String) extends DataHolder with Serializable {
19 |   private val data: RDD[LabeledPoint] = loadDataFromFile
20 |   private val dimensions = 1000
21 | 
22 |   def loadDataFromFile: RDD[LabeledPoint] = {
23 |     val feature1 = SparkEnv.sc.textFile(dataDirectoryPath + "Features.txt")
24 |     val feature2 = SparkEnv.sc.textFile(dataDirectoryPath + "Info.txt")
25 |     val data = SparkEnv.sc.textFile(dataDirectoryPath + "data.txt")
26 |       .map { line =>
27 |       var indices = ArrayBuffer[Int]()
28 |       var values = ArrayBuffer[Double]()
29 |       val fields = line.split(" ")
30 |       val label = fields(0).toDouble
31 |       fields.foreach { field =>
32 |         val featureI = field.split(":")
33 |         if (featureI.length == 2) {
34 |           indices += featureI(0).toInt
35 |           values += featureI(1).toDouble
36 |         }
37 |       }
38 |       new LabeledPoint(label, new SparseVector(dimensions, indices.toArray, values.toArray))
39 |     }
40 |     data
41 |   }
42 | 
43 |   override def getLRData = data
44 | 
45 |   override def getData = ???
46 | 
47 |   override def getDataDesc = println("数据集包含" + data.count + "条数据")
48 | }
49 | 


--------------------------------------------------------------------------------
/sparkML/src/main/util/MainHolder.scala:
--------------------------------------------------------------------------------
 1 | package main.util
 2 | 
 3 | import main.factory.{Algorithm, AlgorithmFactory}
 4 | import main.input.{DataFactory, DataHolder}
 5 | 
 6 | /**
 7 |  * Created by zhy on 2015/7/19 0019.
 8 |  */
 9 | 
10 | /**
11 |  * 初始化并获取DataHolder和Recommender
12 |  */
13 | object MainHolder {
14 |   private var recommender: Option[Algorithm] = None
15 |   private var dataHolder: Option[DataHolder] = None
16 | 
17 |   /**
18 |    * 初始化DataHolder数据源和rcommender算法
19 |    * @param conf 配置管理类
20 |    */
21 |   def setUp(conf: Conf): Unit = {
22 |     val dataHolderNameToFactoryMap = DataFactory.dataHolderList.map(holder => holder.getName -> holder).toMap
23 |     val dataHolderStr: String = conf.data()
24 |     dataHolder = Some(dataHolderNameToFactoryMap.get(dataHolderStr).get.getInstance(conf))
25 | 
26 |     val recommenderNameToFactoryMap = AlgorithmFactory.AlgList.map(rec => rec.getName -> rec).toMap
27 |     val recommenderStr: String = conf.method()
28 |     recommender = Some(recommenderNameToFactoryMap.get(recommenderStr).get.getAlg(conf))
29 |   }
30 | 
31 |   /**
32 |    * 计算该推荐算法对于测试集的均方根误差RMSE
33 |    * @return Unit
34 |    */
35 |   def calculateRMSE() = getAlgInstance.getRMSE
36 | 
37 |   /**
38 |    *
39 |    * @return 机器学习算法实例
40 |    */
41 |   def getAlgInstance(): Algorithm = {
42 |     recommender match {
43 |       case Some(rec) => rec
44 |       case None => throw new MainHolderNotInitializedException
45 |     }
46 |   }
47 | 
48 |   /**
49 |    *
50 |    * @return 数据源实例
51 |    */
52 |   def getDataHolder(): DataHolder = {
53 |     dataHolder match {
54 |       case Some(holder) => holder
55 |       case None => throw new MainHolderNotInitializedException
56 |     }
57 |   }
58 | 
59 |   class MainHolderNotInitializedException extends Exception
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/sparkML/src/main/input/DataFactory.scala:
--------------------------------------------------------------------------------
 1 | package main.input
 2 | 
 3 | import main.input.recommend.{NetflixDataHolder4Directory, NetflixDataHolder4OneFile, RecDataHolder, YahooDataHolder}
 4 | import main.util.Conf
 5 | 
 6 | /**
 7 |  * Created by zhy on 2015/7/19 0019.
 8 |  */
 9 | 
10 | /**
11 |  * 数据集工厂
12 |  */
13 | trait DataFactory {
14 |   def getName: String
15 | 
16 |   def getDesc: String
17 | 
18 |   def getInstance(conf: Conf): DataHolder
19 | }
20 | 
21 | object DataFactory {
22 |   val dataHolderList: List[DataFactory] = List(YahooFac, NetFlix2Fac, NetFlix1Fac, LR)
23 | }
24 | 
25 | object YahooFac extends DataFactory {
26 |   override def getName: String = "Yahoo"
27 | 
28 |   override def getDesc: String = "数据源：Yahoo数据集，单个文件\n" +
29 |     "数据格式：userID itemID(musicID) rating(0-100)"
30 | 
31 |   override def getInstance(conf: Conf): RecDataHolder = {
32 |     println(getDesc)
33 |     new YahooDataHolder(conf.dir())
34 |   }
35 | }
36 | 
37 | object NetFlix1Fac extends DataFactory {
38 |   override def getName: String = "NetFlixInFile"
39 | 
40 |   override def getDesc: String = "数据源：NetFlix数据集，单个文件\n数据格式：???"
41 | 
42 |   override def getInstance(conf: Conf): RecDataHolder = {
43 |     println(getDesc)
44 |     new NetflixDataHolder4OneFile(conf.dir())
45 |   }
46 | }
47 | 
48 | object NetFlix2Fac extends DataFactory {
49 |   override def getName: String = "NetFlixInDirectory"
50 | 
51 |   override def getDesc: String = "数据源：NetFlix数据集，目录\n" +
52 |     "数据格式：每个文件第一行为UserID，其余每行：movieID,rating(0-5),time"
53 | 
54 |   override def getInstance(conf: Conf): RecDataHolder = {
55 |     println(getDesc)
56 |     new NetflixDataHolder4Directory(conf.dir())
57 |   }
58 | }
59 | 
60 | object LR extends DataFactory {
61 |   override def getName: String = "LR"
62 | 
63 |   override def getInstance(conf: Conf): DataHolder = {
64 |     println(getDesc)
65 |     new LRDataHolder(conf.dir())
66 |   }
67 | 
68 |   override def getDesc: String = "数据源：逻辑回归数据集，单个文件\n" +
69 |     "数据格式：每行 label 特征维度1:特征数据1 ...... 特征维度n:特征数据n"
70 | }


--------------------------------------------------------------------------------
/sparkML/src/main/optimizer/FTRLProximal.scala:
--------------------------------------------------------------------------------
 1 | package main.optimizer
 2 | 
 3 | import breeze.linalg.SparseVector
 4 | import breeze.numerics.abs
 5 | import org.apache.spark.mllib.regression.LabeledPoint
 6 | 
 7 | /**
 8 |  * Created by zhy on 2015/8/1 0001.
 9 |  */
10 | 
11 | /**
12 |  *
13 |  * @param beta 添加到梯度的协方差矩阵中避免学习速率过高
14 |  * @param alpha 初始学习速率
15 |  * @param L1 L1正则项权重
16 |  * @param L2 L2正则项权重
17 |  * @param D  特征向量维度
18 |  */
19 | final class FTRLProximal(val beta: Double = 0.1, val alpha: Double = 0.1, val L1: Double = 0.0, val L2: Double = 0.0, val D: Int = 1000)
20 |   extends Optimizer {
21 | 
22 |   private val N: SparseVector[Double] = SparseVector.zeros(D)
23 |   private val Z: SparseVector[Double] = SparseVector.zeros(D)
24 |   private var W: SparseVector[Double] = SparseVector.zeros(D)
25 | 
26 |   def printV = {
27 |     println("W向量" + W.toString())
28 |     println("N向量" + N.toString())
29 |     println("Z向量" + Z.toString())
30 |   }
31 | 
32 |   //迭代函数
33 |   def optimize(data: LabeledPoint, initialWeights: SparseVector[Double]):
34 |     SparseVector[Double] = {
35 |     W = initialWeights
36 |     println("optimize函数")
37 |     step(data.features.toArray, data.label.toInt)
38 |   }
39 | 
40 |   //迭代过程
41 |   //TODO 迭代过程需要优化
42 |   def step(feature: Array[Double], label: Int): SparseVector[Double] = {
43 |     println("step函数")
44 |     var p: Double = 0.0
45 |     for (i_double <- feature) {
46 |       val i = i_double.toInt
47 |       var sign: Int = 0
48 |       if (Z(i) < 0)
49 |         sign = -1
50 |       else
51 |         sign = 1
52 |       if (abs(Z(i)) <= L1) {
53 |         W(i) = 0.0
54 |       } else {
55 |         W(i) = (sign * L1 - Z(i)) / ((beta + Math.sqrt(N(i))) / alpha + L2)
56 |       }
57 |       p += W(i)
58 |     }
59 | 
60 |     // predict
61 |     p = 1 / (1 + Math.exp(-p))
62 | 
63 |     // update
64 |     val g: Double = p - label
65 |     for (i_double <- feature) {
66 |       val i = i_double.toInt
67 |       val sigma: Double = (Math.sqrt(N(i) + g * g) - Math.sqrt(N(i))) / alpha
68 |       Z(i) += g - sigma * W(i)
69 |       N(i) += g * g
70 |     }
71 |     W
72 |   }
73 | 
74 | }
75 | 


--------------------------------------------------------------------------------
/sparkML/src/main/factory/AlgorithmFactory.scala:
--------------------------------------------------------------------------------
 1 | package main.factory
 2 | 
 3 | import main.classifier.LRWithFTRL
 4 | import main.recommender.{ALSRec, Recommender, SlopOneRec}
 5 | import main.util.Conf
 6 | 
 7 | /**
 8 |  * Created by zhy on 2015/7/19 0019.
 9 |  */
10 | 
11 | /**
12 |  * 机器学习算法工厂
13 |  */
14 | trait AlgorithmFactory {
15 |   def getName: String
16 | 
17 |   def getAlg(conf: Conf): Algorithm
18 | 
19 |   def getAlgDes(): String
20 | 
21 |   def getParamDes(): String
22 | 
23 |   def getDescription(): String = {
24 |     getAlgDes() + "\n参数:\n" + getParamDes()
25 |   }
26 | }
27 | 
28 | object AlgorithmFactory {
29 |   val AlgList: List[AlgorithmFactory] = List(ALS, SlopOne, LRWithFTRL)
30 | }
31 | 
32 | object ALS extends AlgorithmFactory {
33 |   override def getName: String = "ALS"
34 | 
35 |   override def getAlgDes(): String = "MLLib中ALS算法"
36 | 
37 |   protected val rankStr = "rank"
38 |   protected val lambdaStr = "λ"
39 |   protected val iterStr = "numberOfIterations"
40 | 
41 |   override def getParamDes(): String = rankStr + " = <Int>，特征向量维度\n" + lambdaStr + " = <Double>，正则化参数\n" + iterStr + " = <Int>,迭代次数"
42 | 
43 |   override def getAlg(conf: Conf): Recommender = {
44 |     val ranks = 12 to 15
45 |     val lambdas = List(0.01, 0.05)
46 |     val iters = 10 to 20
47 | 
48 |     println(getDescription)
49 |     new ALSRec(ranks, lambdas, iters)
50 |   }
51 | }
52 | 
53 | object SlopOne extends AlgorithmFactory {
54 |   override def getName: String = "Slop-One"
55 | 
56 |   override def getAlg(conf: Conf): Recommender = {
57 |     println(getDescription)
58 |     new SlopOneRec
59 |   }
60 | 
61 |   override def getParamDes(): String = "无参数"
62 | 
63 |   override def getAlgDes(): String = "Slop-One算法"
64 | }
65 | 
66 | object LRWithFTRL extends AlgorithmFactory {
67 |   override def getName: String = "LR-FTRL"
68 | 
69 |   protected val numFea = "numFeatures"
70 | 
71 |   override def getParamDes(): String = numFea + "= <Int>，特征向量维度\n"
72 | 
73 |   override def getAlgDes(): String = "采用FTRL-Proximal优化的Logistic Regression算法"
74 | 
75 |   override def getAlg(conf: Conf): Algorithm = {
76 |     //TODO 根据数据集特征提供维度
77 |     val numFeatures = 1000
78 | 
79 |     println(getDescription)
80 |     new LRWithFTRL(numFeatures)
81 |   }
82 | }


--------------------------------------------------------------------------------
/sparkML/src/main/Classifier/LRWithFTRL.scala:
--------------------------------------------------------------------------------
 1 | package main.classifier
 2 | 
 3 | import breeze.linalg.SparseVector
 4 | import breeze.numerics.exp
 5 | import main.factory.InputLRData
 6 | import main.linalg.AlgUtil
 7 | import main.optimizer.FTRLProximal
 8 | import org.apache.spark.mllib.linalg.Vector
 9 | import org.apache.spark.mllib.regression.LabeledPoint
10 | import org.apache.spark.rdd.RDD
11 | 
12 | import scala.collection.mutable.ArrayBuffer
13 | 
14 | /**
15 |  * Created by zhy on 2015/8/2 0002.
16 |  */
17 | 
18 | /**
19 |  * Logistic Regression逻辑回归模型
20 |  */
21 | final class LRWithFTRL(val numFeatures: Int)
22 |   extends RegressionModel with InputLRData with Serializable {
23 | 
24 |   //初始化特征向量
25 |   private var weights: SparseVector[Double] = SparseVector.zeros(numFeatures)
26 | 
27 |   //设定优化算法
28 |   override val optimizer = new FTRLProximal(D = numFeatures)
29 | 
30 |   train(trainData)
31 |   predictAccuracy(testData)
32 | 
33 |   def train(data: LabeledPoint): Unit = {
34 |     weights = optimizer.optimize(data, weights)
35 |     optimizer.printV
36 |   }
37 | 
38 |   //训练参数
39 |   //TODO 训练和测试过程并行化
40 |   override def train(trainData: RDD[LabeledPoint]): Unit = {
41 |     val localTrainData = trainData.toLocalIterator
42 |     localTrainData.foreach(data => train(data))
43 |   }
44 | 
45 |   /**
46 |    * 分类预测准确率
47 |    * @param testData 测试数据集合
48 |    * @return 准确率
49 |    */
50 |   def predictAccuracy(testData: RDD[LabeledPoint]): Unit = {
51 |     var predictions = new ArrayBuffer[Tuple2[Double,Double]]()
52 |     testData.toLocalIterator.foreach{ data =>
53 |       val prediction = (data.label, predict(data.features))
54 |       train(data)
55 |       predictions += prediction
56 |     }
57 |     val numData:Int = predictions.toArray.length
58 |     val numCorrect:Int = predictions.toArray.filter{data=>
59 |       data._1 == data._2
60 |     }.length
61 |     println("正确预测的数量： " + numCorrect +
62 |       "\n所有数量： " + numData )
63 |     RMSE = numCorrect * 1.0 / numData
64 |   }
65 | 
66 |   /**
67 |    * 根据假设函数 预测单个样本
68 |    * @param testData 测试样本数据
69 |    * @return 分类数据：  1 or 0
70 |    */
71 |   def predict(testData: Vector): Double = {
72 |     val x: Double = weights.dot(AlgUtil.VtoB(testData))
73 |     val prob: Double = sigmod(x)
74 |     if (prob > 0.5) return 1.0
75 |     else return 0.0
76 |   }
77 | 
78 |   override def getRMSE =
79 |     println("使用FTRL-Proximal的逻辑回归在测试集上的预测准确率为" + RMSE + "\n----------测试完毕----------")
80 | 
81 |   //sigmod函数
82 |   private def sigmod(x: Double): Double = 1.0 / (1 + exp(-x))
83 | }
84 | 


--------------------------------------------------------------------------------
/sparkML/src/main/recommender/ALSRec.scala:
--------------------------------------------------------------------------------
 1 | package main.recommender
 2 | 
 3 | import main.factory.InputRecData
 4 | import org.apache.spark.mllib.recommendation.{ALS, MatrixFactorizationModel, Rating}
 5 | import org.apache.spark.rdd.RDD
 6 | 
 7 | /**
 8 |  * Created by zhy on 2015/7/19 0019.
 9 |  */
10 | 
11 | final class ALSRec(ranks: Range, lambdas: List[Double], numIters: Range)
12 |   extends Recommender with InputRecData with Serializable {
13 |   //训练模型并测试
14 |   val model = getBestModel()
15 |   test
16 | 
17 | 
18 |   /**
19 |    *
20 |    * @return 获取参数最佳的模型
21 |    */
22 |   private def getBestModel(): Option[MatrixFactorizationModel] = {
23 |     Some(ALS.train(trainData,12,20,0.05))
24 |   }
25 | 
26 |   /**
27 |    * 使用测试集进行测试
28 |    */
29 |   private def test = {
30 |     RMSE = calculateRmse(model.get, testData, numTest)
31 |   }
32 | 
33 |   /**
34 |    * 计算rmse均方根误差
35 |    * @param model 算法模型
36 |    * @param dataset 数据集
37 |    * @param n 数据集大小
38 |    * @return 该算法模型在该验证数据集上的RMSE
39 |    */
40 |   private def calculateRmse(model: MatrixFactorizationModel, dataset: RDD[Rating], n: Long): Double = {
41 |     val predictions: RDD[Rating] = model.predict(dataset.map(x => (x.user, x.product)))
42 |     val predictionsAndRatings = predictions.map { x =>
43 |       ((x.user, x.product), x.rating)
44 |     }.join(dataset.map { x =>
45 |       ((x.user, x.product), x.rating)
46 |     }
47 |       ).values
48 |     val tmp_RMSE = math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).reduce(_ + _) / n)
49 |     println("计算得到的RMSE为： " + tmp_RMSE)
50 |     tmp_RMSE
51 |   }
52 | 
53 |   /**
54 |    * 训练模型
55 |    * @param numValidation 验证集大小
56 |    * @return 训练完成的模型
57 |    */
58 |   private def train(numValidation: Long): Option[MatrixFactorizationModel] = {
59 |     RMSE = Double.MaxValue
60 |     var bestModel: Option[MatrixFactorizationModel] = None
61 |     var bestRank = 0
62 |     var bestLambda = -1.0
63 |     var bestNumIter = -1
64 | 
65 |     for (rank <- ranks; lambda <- lambdas; numIter <- numIters) {
66 |       val model = ALS.train(trainData, rank, numIter, lambda)
67 |       val validataionRmse = calculateRmse(model, validateData, numValidation)
68 |       if (validataionRmse < RMSE) {
69 |         bestModel = Some(model)
70 |         RMSE = validataionRmse
71 |         bestRank = rank
72 |         bestLambda = lambda
73 |         bestNumIter = numIter
74 |       }
75 |     }
76 |     println("模型训练完毕。最优参数为： （rank = " + bestRank + "; numIter = " +
77 |       bestNumIter + "; lambda = " + bestLambda + ")")
78 | 
79 |     bestModel
80 |   }
81 | }


--------------------------------------------------------------------------------
/bash/splitDataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3.3
 2 | # coding: UTF-8
 3 | #Author :zhy
 4 | 
 5 | import os,sys,shutil
 6 | import random
 7 | from subprocess import call
 8 | 
 9 | def chooseFile():
10 |     path = sys.argv[1]
11 |     percentage = (float)(sys.argv[2])
12 |     fileList = os.listdir(path)
13 |     fileNum = (int)(len(fileList) * percentage)
14 |     fileNum = [1,fileNum][fileNum >= 1]
15 |     chosenFile = random.sample(fileList,fileNum)
16 |     print(chosenFile)
17 |     return chosenFile
18 | 
19 | def splitDatasetToDirectory(chosenFile):
20 |     """将NetFlix数据集目录下的小文件随机选择10%复制到另外一个文件夹"""
21 |     path = sys.argv[1]
22 |     for file in chosenFile:
23 |         if sys.platform.__eq__("win32"):
24 |             desPath = "c:/Users/zhy/Documents/study/AD.SE/courseDesign/DatasetInHDFS/NetFlix/little/"
25 |             shutil.copy(path + file, desPath)
26 |         else:
27 |             call(["cp", path + file, path + "../little/training_set/"])
28 | 
29 | def splitDatasetToHDFS(chosenFile):
30 |     """将NetFlix数据集目录下的小文件随机选择10%上传至HDFS中以测试使用"""
31 |     deleteOldData()
32 |     path = sys.argv[1]
33 |     for file in chosenFile:
34 |         call(["hdfs","dfs","-put",path + file,"/zhy/data/NetFlix/little/training_set/"])
35 | 
36 | def deleteOldData():
37 |     """删除HDFS中原有的数据集"""
38 |     call(["hdfs","dfs","-rm","-R","/zhy/data/NetFlix/little/training_set/"]) 
39 |     call(["hdfs","dfs","-mkdir","/zhy/data/NetFlix/little/training_set/"])
40 | 
41 | def inputParm():
42 |     if len(sys.argv) < 3:
43 |         print("命令格式：./splitDataset.py [本地数据集路径] [数据集选择比例]")
44 |         sys.exit(1)
45 |     trigger = True
46 |     while trigger:
47 |         try:
48 |             trigger = False
49 |             print("1 -> 抽取 “" + sys.argv[1] + 
50 |                 "” 目录下10%的文件并复制到“../little/training_set/”目录下\n")
51 |             print("2 -> 抽取 “" + sys.argv[1] + 
52 |                 "” 目录下10%的文件并上传到HDFS中“zhy/data/NetFlix/little/training_set/”目录下\n")
53 |             print("3 -> 运行Spark程序\n")
54 |             print("4 -> 同时执行'2'+'3'的操作\n")
55 |             param = int(input('请输入选择的操作 ... \n'))
56 |             return param
57 | 
58 |         except ValueError:
59 |             trigger = True
60 |             print("输入不合法，请输入一个数字 ... ")
61 | 
62 | def execute():
63 |     DataSet = input('请输入数据集选项：\n')
64 |     Dir = input('请输入数据集位置：\n')
65 |     Alg = input('请输入算法类型：\n')
66 |     call(["spark-submit","/home/zhy/spark-app/zhy/sparkML.jar","--data",DataSet,"--dir",Dir,"--method",Alg])
67 | 
68 | if __name__ == '__main__':
69 |     param = inputParm()
70 |     if param == 1:
71 |         splitDatasetToDirectory(chooseFile())
72 |     elif param == 2:
73 |         splitDatasetToHDFS(chooseFile())
74 |     elif param == 3:
75 |         execute()
76 |     elif param == 4:
77 |         splitDatasetToHDFS(chooseFile())
78 |         execute()
79 |     else:
80 |         print("未知选项，退出程序")
81 |         sys.exit(1)
82 | 
83 | 


--------------------------------------------------------------------------------
/sparkML/src/main/input/recommend/NetflixDataHolder.scala:
--------------------------------------------------------------------------------
 1 | package main.input.recommend
 2 | 
 3 | /**
 4 |  * Created by zhy on 2015/7/18 0018.
 5 |  */
 6 | 
 7 | import main.util.SparkEnv
 8 | import org.apache.hadoop.conf.Configuration
 9 | import org.apache.hadoop.fs.{FileSystem, Path}
10 | import org.apache.spark.mllib.recommendation.Rating
11 | import org.apache.spark.rdd.RDD
12 | 
13 | import scala.collection.mutable.ArrayBuffer
14 | ;
15 | 
16 | /**
17 |  * @param dataDirectoryPath NetFlix数据集根目录
18 |  */
19 | abstract class NetflixDataHolder(dataDirectoryPath: String) extends RecDataHolder {
20 |   protected val productsIDsToNameMap = loadIDsToProductnameMapFromADirectory()
21 | 
22 |   /**
23 |    * 从 "movie_titles.txt" 中获取电影名和ID的映射
24 |    * @return Map: movieID -> title
25 |    */
26 |   protected def loadIDsToProductnameMapFromADirectory(): Map[Int, String] = {
27 |     val sc = SparkEnv.sc
28 |     val movies = sc.textFile(dataDirectoryPath + "movie_titles.txt").map { line =>
29 |       val fields = line.split(",")
30 |       // format: (movieID, movieName)
31 |       (fields(0).toInt, fields(2) + " (" + fields(1) + ")")
32 |     }.collect.toMap
33 |     movies
34 |   }
35 | }
36 | 
37 | /**
38 |  * 从一个文件读取NetFilx数据 文件格式: movieID>,userID,rating,date.
39 |  * @param dataDirectoryPath NetFlix数据集目录
40 |  * @param filename 文件名
41 |  */
42 | class NetflixDataHolder4OneFile(dataDirectoryPath: String, filename: String = "ratings.txt") extends NetflixDataHolder(dataDirectoryPath) with Serializable {
43 |   protected val ratings = {
44 |     val sc = SparkEnv.sc
45 |     val ratingsRDD = sc.textFile(dataDirectoryPath + filename).map {
46 |       line => val fields = line.split(",")
47 |         (Rating(fields(1).toInt, fields(0).toInt, fields(2).toDouble))
48 |     }
49 |     ratingsRDD
50 |   }
51 | }
52 | 
53 | /**
54 |  * 从一个目录下所有文件读取NetFilx数据 文件格式: movieID>,userID,rating,date.
55 |  * @param dataDirectoryPath NetFlix数据集目录
56 |  */
57 | class NetflixDataHolder4Directory(dataDirectoryPath: String) extends NetflixDataHolder(dataDirectoryPath) with Serializable {
58 |   protected val ratings = loadRatingsFromADirectory()
59 | 
60 |   protected def loadRatingsFromADirectory(): RDD[Rating] = {
61 |     val conf = new Configuration()
62 |     val hdfs = FileSystem.get(conf)
63 |     val dataPath = new Path(dataDirectoryPath + "training_set")
64 |     val stats = hdfs.listStatus(dataPath)
65 |     var fileList = new ArrayBuffer[String]
66 | 
67 |     for (stat <- stats) fileList += stat.getPath.toString
68 |     val ratingsRDDsArray = fileList.map(filePath => loadRatingsFromOneFile(filePath))
69 |     val ratings = SparkEnv.sc.union(ratingsRDDsArray)
70 |     ratings.persist.coalesce(77)
71 |   }
72 | 
73 |   protected def loadRatingsFromOneFile(absoluteFilePath: String): RDD[Rating] = {
74 |     val ratingsTxtRDD = SparkEnv.sc.textFile(absoluteFilePath)
75 |     val movieIDLine = ratingsTxtRDD.first()
76 |     val movieID = movieIDLine.split(":")(0).toInt
77 | 
78 |     val ratingsRDD = ratingsTxtRDD.map(line => if (line == movieIDLine) {
79 |       Rating(-1, -1, -1)
80 |     } else {
81 |       val fields = line.split(",")
82 |       (Rating(fields(0).toInt, movieID, fields(1).toDouble))
83 |     })
84 |     ratingsRDD.filter(rat => rat.user >= 0)
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/sparkML/src/main/recommender/SlopOneRec.scala:
--------------------------------------------------------------------------------
  1 | package main.recommender
  2 | 
  3 | import main.factory.InputRecData
  4 | import org.apache.spark.mllib.recommendation.Rating
  5 | import org.apache.spark.rdd.RDD
  6 | 
  7 | import scala.collection.mutable.ArrayBuffer
  8 | 
  9 | /**
 10 |  * Created by zhy on 2015/7/26 0026.
 11 |  */
 12 | final class SlopOneRec extends Recommender with InputRecData with Serializable {
 13 |   val trainDataGroupByUser = trainData.map(rating => (rating.user, (rating.product, rating.rating)))
 14 |     .groupByKey.persist
 15 |   test
 16 | 
 17 | 
 18 |   /**
 19 |    * 使用测试集进行测试
 20 |    */
 21 |   private def test = {
 22 |     RMSE = calculateRmse(testData, numTest)
 23 |   }
 24 | 
 25 |   def numUserConsumer_ij(product_i: Int, product_j: Int): Long = {
 26 |     trainDataGroupByUser.filter { trainData4one =>
 27 |       trainData4one._2.exists(a => a._1 == product_i) && trainData4one._2.exists(b => b._1 == product_j)
 28 |     }.count()
 29 |   }
 30 | 
 31 |   /**
 32 |    *
 33 |    * @param u 用户ID
 34 |    * @param i 物品ID
 35 |    * @return 评分三元组
 36 |    */
 37 |   def predict(u: Int, i: Int): Rating = {
 38 |     //(projectID,Ratings)
 39 |     val S_u = {
 40 |       val ratings = trainDataGroupByUser.lookup(u)
 41 |       if (ratings.length <= 0) throw new UserNotFoundException
 42 |       ratings(0).toIterator
 43 |     }
 44 |     var prediction: Double = 0
 45 |     var S_u_minus_i: Double = 0
 46 |     S_u.foreach { S_uj =>
 47 |       if (S_uj._1 == i) return new Rating(u, i, S_uj._2)
 48 |       val deviation_ij = calcuDeviation_ij(i, S_uj._1)
 49 |       val r_uj = S_uj._2
 50 |       S_u_minus_i += 1
 51 |       prediction += (deviation_ij + r_uj)
 52 |     }
 53 |     new Rating(u, i, prediction / (S_u_minus_i - 1))
 54 |   }
 55 | 
 56 | 
 57 |   private def calculateRmse(dataset: RDD[Rating], n: Long): Double = {
 58 |     println("开始计算RMSE")
 59 |     var predictions = ArrayBuffer[Rating]()
 60 |     val train = dataset.toLocalIterator
 61 |     train.foreach { x =>
 62 |       println("预测一个样本的评分")
 63 |       predictions += predict(x.user, x.product)
 64 |     }
 65 |     val predictionsRDD: RDD[Rating] = sc.parallelize(predictions.toSeq)
 66 |     val predictionsAndRatings = predictionsRDD.map { x =>
 67 |       ((x.user, x.product), x.rating)
 68 |     }.join(dataset.map { x =>
 69 |       ((x.user, x.product), x.rating)
 70 |     }
 71 |       ).values
 72 |     val tmp_RMSE = math.sqrt(predictionsAndRatings.map(x => (x._1 - x._2) * (x._1 - x._2)).reduce(_ + _) / n)
 73 |     println("计算得到的RMSE为： " + tmp_RMSE)
 74 |     tmp_RMSE
 75 |   }
 76 | 
 77 |   /**
 78 |    *
 79 |    * @param product_i 物品i
 80 |    * @param product_j 物品j
 81 |    * @return 物品i与j的偏差
 82 |    */
 83 |   private def calcuDeviation_ij(product_i: Int, product_j: Int): Double = {
 84 |     val userList4i = trainData.filter(rating => rating.product == product_i)
 85 |     val userList4j = trainData.filter(rating => rating.product == product_j)
 86 |     val userList4ij = userList4i.intersection(userList4j)
 87 |     val numUser4ij = userList4ij.count()
 88 |     if (numUser4ij == 0) return 0
 89 |     var deviation_ij: Double = 0
 90 |     userList4ij.foreach { rating =>
 91 |       val user = rating.user
 92 |       val rating_ui = userList4ij.filter(rating => rating.user == user && rating.product == product_i)
 93 |         .toLocalIterator.next().rating
 94 |       val rating_uj = userList4ij.filter(rating => rating.user == user && rating.product == product_j)
 95 |         .toLocalIterator.next().rating
 96 |       deviation_ij += (rating_ui - rating_uj)
 97 |     }
 98 |     deviation_ij / numUser4ij
 99 |   }
100 | 
101 |   class UserNotFoundException extends Exception
102 | 
103 | }
104 | 


--------------------------------------------------------------------------------