├── .travis.yml
├── project
├── build.properties
└── plugins.sbt
├── .gitignore
├── src
├── test
│ └── scala
│ │ └── me
│ │ └── soulmachine
│ │ └── spark
│ │ └── WordCountTest.scala
└── main
│ └── scala
│ └── me
│ └── soulmachine
│ └── spark
│ └── WordCount.scala
├── README.md
├── scalastyle-config.xml
└── LICENSE
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: scala
2 | scala:
3 | - 2.11.7
4 | jdk:
5 | - oraclejdk7
6 | - openjdk7
7 |
--------------------------------------------------------------------------------
/project/build.properties:
--------------------------------------------------------------------------------
1 | // This file should only contain the version of sbt to use.
2 | sbt.version=0.13.7
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.class
3 | *.log
4 | *.pyc
5 | sbt/*.jar
6 | project/*.lock
7 |
8 | # sbt specific
9 | .cache/
10 | .history/
11 | .lib/
12 | dist/*
13 | target/
14 | lib_managed/
15 | src_managed/
16 | project/boot/
17 | project/plugins/project/
18 |
19 | .idea/
20 | .idea_modules/
21 | out/
22 | *.iml
23 |
--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | // You may use this file to add plugin dependencies for sbt.
2 |
3 | resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/"
4 |
5 | // scapegoat: static analysis compiler plugin
6 | addSbtPlugin("com.sksamuel.scapegoat" %% "sbt-scapegoat" % "1.0.0")
7 |
8 | // scalastyle: coding style check and enforcer
9 | addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.7.0")
10 |
11 | addCompilerPlugin("org.psywerx.hairyfotr" %% "linter" % "0.1.12")
12 |
13 | addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.6.0")
14 |
--------------------------------------------------------------------------------
/src/test/scala/me/soulmachine/spark/WordCountTest.scala:
--------------------------------------------------------------------------------
1 | package me.soulmachine.spark
2 |
3 | import java.io.File
4 | import java.nio.charset.StandardCharsets
5 | import java.nio.file.{Paths, Files}
6 |
7 | import org.scalatest._
8 |
9 | import scala.io.Source
10 |
11 |
12 | class WordCountTest extends FlatSpec with Matchers {
13 | "A WordCount job" should "count words correctly" in {
14 | val tempDir = Files.createTempDirectory(null)
15 | println(tempDir.toAbsolutePath)
16 |
17 | val inputFile = new File(tempDir.toAbsolutePath.toString, "input.txt")
18 | Files.write(Paths.get(inputFile.getAbsolutePath),
19 | "hack hack hack and hack".getBytes(StandardCharsets.UTF_8))
20 | inputFile.deleteOnExit()
21 |
22 | val outputDir = new File(tempDir.toAbsolutePath.toString, "output").getAbsolutePath
23 |
24 | WordCount.execute(
25 | master = Some("local"),
26 | input = inputFile.getAbsolutePath,
27 | output = outputDir
28 | )
29 |
30 | val outputFile = new File(outputDir, "part-00000")
31 | val actual = Source.fromFile(outputFile, "UTF-8").mkString
32 | // delete the temporary folder
33 | new ProcessBuilder("rm","-rf", tempDir.toAbsolutePath.toString).start().waitFor()
34 |
35 | assert(actual === "(hack,4)\n(and,1)\n")
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/scala/me/soulmachine/spark/WordCount.scala:
--------------------------------------------------------------------------------
1 | package me.soulmachine.spark
2 |
3 | import org.apache.spark._
4 |
5 | object WordCount {
6 | def main(args: Array[String]) {
7 | if (args.length != 2) {
8 | System.err.println("Usage: SparkWordCount