├── project ├── build.properties ├── build.sbt └── plugins.sbt ├── version.sbt ├── examples ├── fatjar │ ├── project │ │ └── plugins.sbt │ ├── README.md │ ├── src │ │ └── main │ │ │ └── scala │ │ │ └── FatJar.scala │ └── build.sbt ├── scoobding │ ├── src │ │ └── main │ │ │ ├── resources │ │ │ └── images │ │ │ │ ├── clock-icon.png │ │ │ │ ├── folder-icon.png │ │ │ │ └── stats-icon.png │ │ │ └── scala │ │ │ └── application │ │ │ ├── gui │ │ │ ├── EventSourceAction.scala │ │ │ ├── LeftMenuBar.scala │ │ │ ├── TablePanel.scala │ │ │ ├── Images.scala │ │ │ ├── ScrollableComponent.scala │ │ │ ├── ActionMenuItem.scala │ │ │ ├── LabeledFieldPanel.scala │ │ │ ├── WaitCursor.scala │ │ │ ├── LabeledComboBoxPanel.scala │ │ │ ├── PositionedBorderPanel.scala │ │ │ ├── BackgroundAction.scala │ │ │ ├── OpenFileMenuItem.scala │ │ │ ├── OpenFilesMenuItem.scala │ │ │ └── StatusPanel.scala │ │ │ ├── reactive │ │ │ ├── Trigger.scala │ │ │ ├── Signalx.scala │ │ │ ├── FilePoller.scala │ │ │ └── EventStreamSourceProxy.scala │ │ │ ├── units │ │ │ ├── CountUnit.scala │ │ │ ├── PercentUnit.scala │ │ │ ├── MemoryUnit.scala │ │ │ ├── TimeUnit.scala │ │ │ ├── MeasureUnit.scala │ │ │ └── Quantity.scala │ │ │ ├── report │ │ │ └── Report.scala │ │ │ ├── measure │ │ │ ├── StringKey.scala │ │ │ ├── Measurement.scala │ │ │ └── RangeKey.scala │ │ │ ├── time │ │ │ ├── Timestamped.scala │ │ │ └── package.scala │ │ │ ├── app │ │ │ ├── BarsPanel.scala │ │ │ └── ReportPanel.scala │ │ │ └── text │ │ │ ├── Text.scala │ │ │ └── PrettyPrinter.scala │ ├── build.sbt │ └── scoobding.iml ├── avro │ ├── project │ │ └── plugins.sbt │ ├── src │ │ └── main │ │ │ └── avro │ │ │ └── weather.avsc │ ├── README.md │ └── build.sbt ├── pageRank │ └── build.sbt ├── README.md └── wordCount │ └── build.sbt ├── notes ├── 0.8.3.markdown ├── about.markdown ├── 0.6.2.markdown ├── 0.8.2.markdown ├── 0.9.1.markdown ├── 0.8.1.markdown ├── header.txt ├── 0.8.4.markdown ├── 0.7.2.markdown ├── 0.9.0.markdown ├── 0.7.3.markdown ├── 0.8.5.markdown ├── 0.4.0.markdown ├── 0.2.0.markdown └── 0.8.0.markdown ├── src ├── test │ ├── thrift │ │ ├── test.thrift │ │ └── build │ ├── avro │ │ └── SampleSchema.avsc │ ├── scala │ │ └── com │ │ │ └── nicta │ │ │ └── scoobi │ │ │ ├── testing │ │ │ ├── UnitSpecification.scala │ │ │ ├── mutable │ │ │ │ ├── script │ │ │ │ │ └── NictaHadoop.scala │ │ │ │ ├── UnitSpecification.scala │ │ │ │ └── NictaHadoop.scala │ │ │ ├── script │ │ │ │ ├── UnitSpecification.scala │ │ │ │ └── NictaHadoop.scala │ │ │ ├── TempFilesSpec.scala │ │ │ └── NictaHadoop.scala │ │ │ ├── guide │ │ │ ├── Index.scala │ │ │ ├── ScoobiPage.scala │ │ │ ├── ScoobiDevelopment.scala │ │ │ └── Advanced.scala │ │ │ ├── core │ │ │ ├── GroupingSpec.scala │ │ │ └── CheckpointSpec.scala │ │ │ ├── impl │ │ │ ├── plan │ │ │ │ └── mscr │ │ │ │ │ └── MscrAttributes.scala │ │ │ ├── mapreducer │ │ │ │ └── ChannelOutputFormatSpec.scala │ │ │ ├── collection │ │ │ │ └── MapsSpec.scala │ │ │ ├── util │ │ │ │ ├── SerialiserSpec.scala │ │ │ │ └── DistCacheSpec.scala │ │ │ ├── control │ │ │ │ └── FunctionsSpec.scala │ │ │ └── reflect │ │ │ │ └── ClassesSpec.scala │ │ │ ├── io │ │ │ ├── NullDataOutput.scala │ │ │ ├── text │ │ │ │ └── TextOutputSpec.scala │ │ │ └── thrift │ │ │ │ └── ThriftSchemaSpec.scala │ │ │ ├── acceptance │ │ │ ├── NumberPartitionerSpec.scala │ │ │ ├── WordCountSpec.scala │ │ │ ├── TextFileSpec.scala │ │ │ ├── BoundedFilterSpec.scala │ │ │ └── RandomDListsSpec.scala │ │ │ ├── DependenciesSpec.scala │ │ │ └── application │ │ │ └── InMemoryModeSpec.scala │ └── resources │ │ └── css │ │ └── specs2-user.css └── main │ ├── java │ └── com │ │ └── nicta │ │ └── scoobi │ │ └── testavroschema │ │ └── Sha1.java │ ├── scala │ └── com │ │ └── nicta │ │ └── scoobi │ │ ├── application │ │ ├── Levels.scala │ │ ├── Application.scala │ │ ├── Orderings.scala │ │ ├── Cluster.scala │ │ ├── LocalHadoop.scala │ │ └── ScoobiCommandLineArgs.scala │ │ ├── impl │ │ ├── rtt │ │ │ ├── RuntimeClass.scala │ │ │ ├── TaggedValue.scala │ │ │ ├── TaggedPartitioner.scala │ │ │ └── ScoobiWritable.scala │ │ ├── util │ │ │ └── Pretty.scala │ │ ├── control │ │ │ ├── SystemProperties.scala │ │ │ ├── Functions.scala │ │ │ └── ImplicitParameters.scala │ │ ├── collection │ │ │ └── Maps.scala │ │ ├── text │ │ │ └── Showx.scala │ │ ├── Persister.scala │ │ ├── mapreducer │ │ │ └── VectorEmitterWriter.scala │ │ └── reflect │ │ │ └── ClasspathDiagnostics.scala │ │ ├── core │ │ ├── UniqueInt.scala │ │ ├── Environment.scala │ │ ├── DObject.scala │ │ └── InputOutputConverter.scala │ │ ├── testing │ │ ├── mutable │ │ │ └── HadoopSpecification.scala │ │ ├── UploadedLibJars.scala │ │ ├── HadoopSpecification.scala │ │ └── SimpleJobs.scala │ │ ├── io │ │ ├── thrift │ │ │ ├── package.scala │ │ │ └── ThriftSerialiser.scala │ │ └── text │ │ │ └── TextSource.scala │ │ └── Scoobi.scala │ ├── scala-2.10 │ └── com │ │ └── nicta │ │ └── scoobi │ │ ├── reflect │ │ └── internal │ │ │ └── util.scala │ │ └── application │ │ └── ILoopCompat.scala │ ├── scala-2.11 │ └── com │ │ └── nicta │ │ └── scoobi │ │ ├── reflect │ │ └── internal │ │ │ └── util.scala │ │ └── application │ │ └── ILoopCompat.scala │ ├── ls │ ├── 0.4.0.json │ ├── 0.7.0.json │ ├── 0.8.4-cdh4.json │ ├── 0.8.5-cdh4.json │ ├── 0.6.0-cdh4.json │ ├── 0.9.1-cdh4.json │ ├── 0.8.0-cdh4.json │ ├── 0.8.2-cdh4.json │ ├── 0.7.0-cdh3.json │ ├── 0.8.0-cdh3.json │ └── 0.7.0-cdh4.json │ └── bin │ └── scoobi ├── bin ├── ci-unit ├── ci-acceptance ├── ci-hadoop2 ├── ci-cluster ├── ci-repl-snapshot └── ci-release-oss ├── .gitignore ├── .travis.yml └── NOTICE.txt /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.7 2 | -------------------------------------------------------------------------------- /version.sbt: -------------------------------------------------------------------------------- 1 | 2 | version in ThisBuild := "0.9.2" 3 | -------------------------------------------------------------------------------- /examples/fatjar/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.0") -------------------------------------------------------------------------------- /notes/0.8.3.markdown: -------------------------------------------------------------------------------- 1 | Maintenance version. Dependency fix: using shapeless for Scala 2.10.3 2 | -------------------------------------------------------------------------------- /project/build.sbt: -------------------------------------------------------------------------------- 1 | scalacOptions ++= Seq("-deprecation", "-unchecked", "-feature", "-language:_") -------------------------------------------------------------------------------- /src/test/thrift/test.thrift: -------------------------------------------------------------------------------- 1 | namespace java com.nicta.scoobi.io.thrift 2 | 3 | struct MyThrift { 4 | 1: string entity; 5 | } -------------------------------------------------------------------------------- /examples/scoobding/src/main/resources/images/clock-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NICTA/scoobi/HEAD/examples/scoobding/src/main/resources/images/clock-icon.png -------------------------------------------------------------------------------- /examples/scoobding/src/main/resources/images/folder-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NICTA/scoobi/HEAD/examples/scoobding/src/main/resources/images/folder-icon.png -------------------------------------------------------------------------------- /examples/scoobding/src/main/resources/images/stats-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NICTA/scoobi/HEAD/examples/scoobding/src/main/resources/images/stats-icon.png -------------------------------------------------------------------------------- /examples/avro/project/plugins.sbt: -------------------------------------------------------------------------------- 1 | resolvers += "sbt-plugin-releases" at "http://repo.scala-sbt.org/scalasbt/sbt-plugin-releases" 2 | 3 | addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2") 4 | -------------------------------------------------------------------------------- /notes/about.markdown: -------------------------------------------------------------------------------- 1 | ***scoobi*** is a productivity library for writing Hadoop jobs in Scala. 2 | 3 | For more information visit: [http://nicta.github.com/scoobi](http://nicta.github.com/scoobi). -------------------------------------------------------------------------------- /notes/0.6.2.markdown: -------------------------------------------------------------------------------- 1 | Published a [new jar for CDH4](https://oss.sonatype.org/content/repositories/releases/com/nicta/scoobi_2.9.2/0.6.2-cdh4/) with no more SNAPSHOT Avro dependencies 2 | 3 | ------ 4 | -------------------------------------------------------------------------------- /notes/0.8.2.markdown: -------------------------------------------------------------------------------- 1 | Maintenance version 2 | 3 | ### Fixes 4 | 5 | * moved the partitioned files to the right place for inmemory execution [#320](http://github.com/NICTA/scoobi/issues/320) 6 | * updated specs2 in scoobi to 2.3.10 7 | 8 | ------ 9 | -------------------------------------------------------------------------------- /notes/0.9.1.markdown: -------------------------------------------------------------------------------- 1 | Maintenance version 2 | 3 | ### Fixes 4 | 5 | * configureCompression only creates compressor once (fixes compression problems with Deflate and Snappy) by @jbeynon [#352](http://github.com/NICTA/scoobi/issues/352) 6 | 7 | ------ 8 | -------------------------------------------------------------------------------- /bin/ci-unit: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | 3 | JVM_OPTS="-Dfile.encoding=UTF8 -XX:MaxPermSize=512m -Xms512m -Xmx2g -XX:+CMSClassUnloadingEnabled -XX:+UseConcMarkSweepGC"; export JVM_OPTS 4 | 5 | ./sbt -Dsbt.log.noformat=true "; clean; update; test-only -- xonly console junitxml include unit" 6 | -------------------------------------------------------------------------------- /examples/fatjar/README.md: -------------------------------------------------------------------------------- 1 | FatJar Example 2 | ============== 3 | 4 | See the UserGuide (deployment in particular) for information on how this works. But the short of it is `sbt assembly` will generate a fatjar, and `hadoop jar target/FatJarExample-assembly-1.0.jar` will run it 5 | -------------------------------------------------------------------------------- /src/test/thrift/build: -------------------------------------------------------------------------------- 1 | #!/bin/sh -eu 2 | 3 | # This requires a copy of thrift to be installed because it can be run 4 | # The expected files have been checked in for convenience 5 | 6 | DIR=$(dirname $0)/../../.. 7 | thrift -r -out ${DIR}/src/test/java/ --gen java ${DIR}/src/test/thrift/test.thrift -------------------------------------------------------------------------------- /examples/avro/src/main/avro/weather.avsc: -------------------------------------------------------------------------------- 1 | {"type": "record", "name": "test.AvroWeather", 2 | "doc": "A weather reading.", 3 | "fields": [ 4 | {"name": "station", "type": "string", "order": "ignore"}, 5 | {"name": "time", "type": "long"}, 6 | {"name": "temp", "type": "int"} 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | project/boot/ 3 | *.jar 4 | *.iml 5 | examples/*.iml 6 | src/main/*.iml 7 | .idea/ 8 | .idea_modules/ 9 | 10 | .jvmopts 11 | .sbtopts 12 | pgp.sbt 13 | .history 14 | .cache 15 | .project 16 | .classpath 17 | .settings 18 | examples/wordCount/word-count-results/ 19 | .DS_Store 20 | -------------------------------------------------------------------------------- /bin/ci-acceptance: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | 3 | JVM_OPTS="-Dfile.encoding=UTF8 -XX:MaxPermSize=512m -Xms512m -Xmx2g -XX:+CMSClassUnloadingEnabled -XX:+UseConcMarkSweepGC"; export JVM_OPTS 4 | 5 | ./sbt -Dsbt.log.noformat=true "; clean; update; test-only -- xonly junitxml console -exclude MatrixMultiplication -include hadoop -- scoobi !inmemory.times.verbose.all.[[^(Tracker)]|scoobi]" 6 | -------------------------------------------------------------------------------- /src/test/avro/SampleSchema.avsc: -------------------------------------------------------------------------------- 1 | {"type": "record", 2 | "name": "SampleRecord", 3 | "namespace": "com.nicta.scoobi.testavroschema", 4 | "doc": "A sample record.", 5 | "fields": [ 6 | {"name": "str", "type": ["string", "null"]}, 7 | {"name": "num", "type": "int"}, 8 | {"name": "hash", "type": {"type": "fixed", "size": 20, "name": "Sha1"}} 9 | ] 10 | } 11 | -------------------------------------------------------------------------------- /bin/ci-hadoop2: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | 3 | JVM_OPTS="-Dfile.encoding=UTF8 -XX:MaxPermSize=512m -Xms512m -Xmx2g -XX:+CMSClassUnloadingEnabled -XX:+UseConcMarkSweepGC"; export JVM_OPTS 4 | 5 | ./sbt -Dsbt.log.noformat=true ";set version := "0.8.0-cdh4-SNAPSHOT"; test-only -- junitxml console -exclude MatrixMultiplication -include hadoop -- scoobi !inmemory.times.verbose.all.[[^(Tracker)]|scoobi]" 6 | -------------------------------------------------------------------------------- /bin/ci-cluster: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | 3 | JVM_OPTS="-Dfile.encoding=UTF8 -XX:MaxPermSize=512m -Xms512m -Xmx2g -XX:+CMSClassUnloadingEnabled -XX:+UseConcMarkSweepGC"; export JVM_OPTS 4 | 5 | ./sbt -Dsbt.log.noformat=true ";set version := "0.9.0-cdh4-SNAPSHOT";test-only -- junitxml console -exclude MatrixMultiplication -include hadoop -- scoobi deletelibjars.!local.!inmemory.cluster.times.verbose.all.[[^(Tracker|Client)]|scoobi]" 6 | -------------------------------------------------------------------------------- /examples/pageRank/build.sbt: -------------------------------------------------------------------------------- 1 | name := "PageRank" 2 | 3 | version := "1.0" 4 | 5 | scalaVersion := "2.10.2" 6 | 7 | libraryDependencies ++= 8 | Seq("com.nicta" %% "scoobi" % "0.7.2") 9 | 10 | scalacOptions ++= Seq("-deprecation") 11 | 12 | resolvers ++= Seq("sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots", 13 | "cloudera" at "https://repository.cloudera.com/content/repositories/releases") 14 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | Examples 2 | -------- 3 | 4 | These examples (specifically wordCount) are designed to help you quickly get started with Scoobi. Each directory the required files and directory structure to create your own Scoobi application so can be used as a good starting point. 5 | 6 | For more information on building and running, see Scoobi's user guide. `src/test/scala/com/nicta/scoobi/acceptance` is a good spot to look if you're interested in seeing uses of various uses of Scoobi. 7 | -------------------------------------------------------------------------------- /notes/0.8.1.markdown: -------------------------------------------------------------------------------- 1 | Maintenance version 2 | 3 | ### Fixes 4 | 5 | * found a different way to create avro record writers for both cdh4 and cdh5 6 | * added the yarn-site.xml file to the list of configuration files for cdh5/hadoop2 7 | * set the framework.name property for yarn when executing on a cluster 8 | * various fixes to `TextFilePartitionedSink` 9 | * optimised the reading of input splits when using a `DList.tabulate` function 10 | * fixed the caching of metadata 11 | 12 | ------ 13 | -------------------------------------------------------------------------------- /examples/wordCount/build.sbt: -------------------------------------------------------------------------------- 1 | name := "ScoobiWordCount" 2 | 3 | version := "1.0" 4 | 5 | scalaVersion := "2.10.2" 6 | 7 | scalacOptions ++= Seq("-deprecation") 8 | 9 | libraryDependencies ++= Seq("com.nicta" %% "scoobi" % "0.7.2") 10 | 11 | resolvers ++= Seq("sonatype releases" at "http://oss.sonatype.org/content/repositories/releases", 12 | "sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots", 13 | "cloudera" at "https://repository.cloudera.com/content/repositories/releases") 14 | -------------------------------------------------------------------------------- /notes/header.txt: -------------------------------------------------------------------------------- 1 | Copyright 2011,2012 National ICT Australia Limited 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | #use a container-based infrastructure 2 | sudo: false 3 | 4 | # These directories are cached to S3 at the end of the build 5 | cache: 6 | directories: 7 | - $HOME/.ivy2/cache 8 | - $HOME/.sbt/boot/ 9 | 10 | script: 11 | # Your normal script 12 | - sbt ++$TRAVIS_SCALA_VERSION -J-XX:ReservedCodeCacheSize=256M 'test-only -- -include hadoop -exclude unstable' 13 | 14 | # Tricks to avoid unnecessary cache updates 15 | - find $HOME/.sbt -name "*.lock" | xargs rm 16 | - find $HOME/.ivy2 -name "ivydata-*.properties" | xargs rm 17 | 18 | language: scala 19 | 20 | scala: 21 | - 2.11.4 22 | 23 | jdk: 24 | - oraclejdk7 25 | 26 | -------------------------------------------------------------------------------- /examples/scoobding/build.sbt: -------------------------------------------------------------------------------- 1 | name := "scoobding" 2 | 3 | version := "1.0" 4 | 5 | scalaVersion := "2.10.2" 6 | 7 | scalacOptions ++= Seq("-deprecation") 8 | 9 | libraryDependencies ++= 10 | Seq("com.nicta" %% "scoobi" % "0.7.2", 11 | "org.scala-lang" % "scala-swing" % "2.9.2", 12 | "jfree" % "jfreechart" % "1.0.13", 13 | "jfree" % "jcommon" % "1.0.16", 14 | "cc.co.scala-reactive" %% "reactive-core" % "0.3.0") 15 | 16 | 17 | resolvers ++= Seq("sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots", 18 | "cloudera" at "https://repository.cloudera.com/content/repositories/releases") 19 | -------------------------------------------------------------------------------- /examples/avro/README.md: -------------------------------------------------------------------------------- 1 | # Avro Java Code Generator 2 | 3 | ## To compile, run: 4 | 5 | $ sbt compile 6 | 7 | ## To generate the Weather.java source from src/main/avro/weather.avsc: 8 | 9 | $ sbt avro:generate 10 | 11 | Afterwards, look at the auto-generated file: 12 | 13 | $ cat ./target/scala-2.10/src_managed/main/compiled_avro/test/Weather.java 14 | 15 | ## To run the sample AvroExample.scala code that uses the generated Weather.java file 16 | 17 | $ sbt "run-main com.nicta.scoobi.examples.AvroExample" 18 | $ ls -l avro-joined-output/ # output of the avro file 19 | $ cat avro-joined-output2/out-m-00000 # output of the JSON 20 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.0") 2 | 3 | addSbtPlugin("com.typesafe.sbt" % "sbt-pgp" % "0.8.1") 4 | 5 | addSbtPlugin("com.typesafe.sbt" % "sbt-site" % "0.6.2") 6 | 7 | addSbtPlugin("com.typesafe.sbt" % "sbt-ghpages" % "0.5.1") 8 | 9 | addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "0.5.1") 10 | 11 | addSbtPlugin("com.github.gseitz" % "sbt-release" % "0.7.1") 12 | 13 | addSbtPlugin("me.lessis" % "ls-sbt" % "0.1.3") 14 | 15 | addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6") 16 | 17 | addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "0.2.1") 18 | 19 | addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.3.1") -------------------------------------------------------------------------------- /notes/0.8.4.markdown: -------------------------------------------------------------------------------- 1 | Maintenance version 2 | 3 | ### Improvements 4 | 5 | * allow output files to be directly created during the mapping phase if possible 6 | * creating a _SUCCESS file in the output directory only if all Hadoop jobs are successful for a given Scoobi job. Otherwise there is only a _SUCCESS_JOB file 7 | * added support for partitioned sequence files 8 | * better implicit search for faster compilation times (thanks to @retronym) 9 | * compression codecs are not being used if they can't be instantiated for a given platform instead of throwing exceptions 10 | * created separate jars for cdh3, cdh4, cdh5, hadoop2 compatibility, instead of a Compatibility class using reflection. With cdh4/cdh5, the files of a partitioned output are 11 | being moved with a single rename 12 | 13 | 14 | ------ 15 | -------------------------------------------------------------------------------- /notes/0.7.2.markdown: -------------------------------------------------------------------------------- 1 | This release contains several important fixes 2 | 3 | ### Fixes 4 | 5 | * [#279, #280](https://github.com/NICTA/scoobi/issues/279): StackOverflow when printing a large job 6 | * [#281, #282](https://github.com/NICTA/scoobi/issues/282): some operations are executed more than once 7 | * [#283](https://github.com/NICTA/scoobi/issues/283): fromTextFile with empty list can cause incorrect input to be processed 8 | * [#285, #286](https://github.com/NICTA/scoobi/issues/285): DList#materialise creates file handles eagerly, not lazily 9 | * [#287](https://github.com/NICTA/scoobi/issues/287): #HADOOP-9746 issue workaround 10 | 11 | And the release process has been simplified 12 | 13 | ### Packaging 14 | 15 | * there is now only one jar `scoobi-0.7.2.jar` whether using CDH3 or CDH4 16 | 17 | 18 | ------ 19 | -------------------------------------------------------------------------------- /src/main/java/com/nicta/scoobi/testavroschema/Sha1.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Autogenerated by Avro 3 | * 4 | * DO NOT EDIT DIRECTLY 5 | */ 6 | package com.nicta.scoobi.testavroschema; 7 | @SuppressWarnings("all") 8 | @org.apache.avro.specific.FixedSize(20) 9 | @org.apache.avro.specific.AvroGenerated 10 | public class Sha1 extends org.apache.avro.specific.SpecificFixed { 11 | public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"fixed\",\"name\":\"Sha1\",\"namespace\":\"com.nicta.scoobi.testavroschema\",\"size\":20}"); 12 | public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } 13 | 14 | /** Creates a new Sha1 */ 15 | public Sha1() { 16 | super(); 17 | } 18 | 19 | /** Creates a new Sha1 with the given bytes */ 20 | public Sha1(byte[] bytes) { 21 | super(bytes); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/application/Levels.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package application 18 | 19 | object Levels { 20 | 21 | case class Level(level: String) extends AnyVal 22 | } 23 | -------------------------------------------------------------------------------- /src/main/scala-2.10/com/nicta/scoobi/reflect/internal/util.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.reflect.internal 17 | 18 | object util { 19 | type AbstractFileClassLoader = tools.nsc.interpreter.AbstractFileClassLoader 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala-2.11/com/nicta/scoobi/reflect/internal/util.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.reflect.internal 17 | 18 | object util { 19 | type AbstractFileClassLoader = scala.reflect.internal.util.AbstractFileClassLoader 20 | } 21 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/EventSourceAction.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import swing._ 19 | import reactive.Trigger 20 | 21 | abstract class EventSourceAction(title: String) extends Action(title) with Trigger { 22 | def apply { trigger } 23 | } -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/LeftMenuBar.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import scala.swing._ 19 | 20 | case class LeftMenuBar(components: Component*) extends MenuBar { 21 | contents += new FlowPanel(FlowPanel.Alignment.Left)(components:_*) 22 | } 23 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/rtt/RuntimeClass.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package rtt 19 | 20 | /** A class representing a class that has been generated at run-time. */ 21 | final case class RuntimeClass(name: String, clazz: Class[_], bytecode: Array[Byte]) 22 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/TablePanel.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import swing._ 19 | 20 | case class TablePanel(table: Table) extends BoxPanel(Orientation.Vertical) { 21 | contents += table 22 | border = Swing.EtchedBorder(Swing.Lowered) 23 | } 24 | -------------------------------------------------------------------------------- /notes/0.9.0.markdown: -------------------------------------------------------------------------------- 1 | Maintenance version 2 | 3 | ### Improvements 4 | 5 | * use Scalaz 7.1.0 (by @charlesofarrell and @markhibberd) 6 | * Fix String cast to Level in LogFactory (by @charlesofarrell) 7 | * update to Scala 2.11 (by @charlesofarrell) 8 | * catching verification errors in seq sources 9 | * fuse of sequences sources when appending lots of DLists (to avoid possible serialization issues) 10 | * added a `Short` wire format [#327](http://github.com/NICTA/scoobi/issues/327) (by @raronson) 11 | * added a wire format for Thrift [#344](http://github.com/NICTA/scoobi/issues/351) 12 | * added a WireFormat for `\/` 13 | * create the scoobi tmp jar in the scoobi temp dir 14 | 15 | ### Fixes 16 | 17 | * EMR/S3 fixes (by Kevin X Chang) [#344](http://github.com/NICTA/scoobi/issues/344) 18 | * don't move files which are already in the right place 19 | * update the job counters as soon as the job is finished and propagate to the ScoobiConfiguration 20 | 21 | ------ 22 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/reactive/Trigger.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package reactive 17 | 18 | /** 19 | * This event source triggers empty values just to say that "something" happens 20 | */ 21 | trait Trigger extends EventStreamSourceProxy[Unit] { 22 | def trigger = source.fire(()) 23 | } -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/testing/UnitSpecification.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package testing 18 | 19 | import org.specs2.Specification 20 | import org.specs2.specification._ 21 | 22 | abstract class UnitSpecification extends Specification { 23 | override def map(fs: =>Fragments) = section("unit") ^ fs 24 | } 25 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | ==== 2 | Copyright 2011,2012,2013,2014 National ICT Australia Limited 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | ==== 16 | 17 | (c) Copyright National ICT Australia Limited (NICTA), 2011,2012,2013,2014 18 | 19 | Some files contain other unattributed Contributions to the Work; All Contributions 20 | received from Contributors under the terms of the Apache License Agreement v 2.0 and 21 | re-distributed in accordance with that license. 22 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/application/Application.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package application 18 | 19 | trait Application { 20 | type ScoobiApp = com.nicta.scoobi.application.ScoobiApp 21 | type ScoobiConfiguration = com.nicta.scoobi.core.ScoobiConfiguration 22 | } 23 | object Application extends Application 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/test/resources/css/specs2-user.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | #breadcrumbs { 17 | display: block; 18 | } 19 | 20 | pre { 21 | background-color: #F5F5F5; 22 | margin: 2px; 23 | padding: 5px; 24 | } 25 | 26 | code { 27 | background-color: #F5F5F5; 28 | padding-right: 3px; 29 | padding-left: 3px; 30 | } 31 | 32 | pre code { 33 | padding: 0px; 34 | } 35 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/Images.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import javax.swing.ImageIcon 19 | 20 | object Images { 21 | lazy val imagesDir = "/images/" 22 | def getImage(url: String) = getIcon(url).getImage 23 | def getIcon(url: String) = new ImageIcon(getClass.getResource(imagesDir+url)) 24 | } 25 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/units/CountUnit.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package units 17 | 18 | /** 19 | * This measure unit is used when counting elements like the number of database rows read 20 | */ 21 | object CountUnit extends MeasureUnit { 22 | override def show: String = "number" 23 | override def toString = "" 24 | val factor = 1L 25 | } -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/testing/mutable/script/NictaHadoop.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.testing.mutable.script 17 | 18 | abstract class NictaHadoop extends com.nicta.scoobi.testing.script.NictaHadoop with org.specs2.mutable.script.SpecificationLike 19 | 20 | abstract class NictaSimpleJobs extends NictaHadoop with com.nicta.scoobi.testing.SimpleJobs 21 | 22 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/testing/script/UnitSpecification.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package testing 18 | package script 19 | 20 | import org.specs2.specification._ 21 | import script.Specification 22 | 23 | abstract class UnitSpecification extends Specification { 24 | override def map(fs: =>Fragments) = section("unit") ^ super.map(fs) 25 | } 26 | 27 | -------------------------------------------------------------------------------- /src/main/scala-2.10/com/nicta/scoobi/application/ILoopCompat.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.application 17 | 18 | import scala.tools.nsc.interpreter.ILoop 19 | 20 | // Without using addThunk we run into deadlock issues in 2.10 21 | // There doesn't appear to be a common function between 2.10 and 2.11 that will preserve the same behaviour 22 | trait ILoopCompat extends ILoop 23 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/guide/Index.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package guide 18 | 19 | /** 20 | * This class generates the index.html page used for landing users on http://NICTA.github.com/scoobi 21 | */ 22 | class Index extends ScoobiPage { def is = 23 | "Scoobi".title ^ 24 | "Welcome!" ^ 25 | ReadMe.is ^ 26 | include((new UserGuide).hide) 27 | } 28 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/ScrollableComponent.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import swing.{Component, ScrollPane} 19 | 20 | object ScrollableComponent { 21 | implicit def toScrollable(component: Component) = new ToScrollable(component) 22 | class ToScrollable(component: Component) { 23 | def scrollable = new ScrollPane(component) 24 | } 25 | } -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/util/Pretty.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package util 19 | 20 | object Pretty { 21 | 22 | def indent(n: Int, s: String): String = { 23 | val lines = s.split("\n") 24 | lines.mkString("\n" + " " * n) 25 | } 26 | 27 | def indent(pre: String, s:String): String = { 28 | pre + indent(pre.length,s) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/units/PercentUnit.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package units 17 | 18 | /** 19 | * This measure unit is used when providing a ratio of elements like the percentage of cache utilization 20 | */ 21 | object PercentUnit extends MeasureUnit { 22 | 23 | override def show: String = "%" 24 | override def toString = "" 25 | 26 | val factor = 1L 27 | } -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/core/GroupingSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package core 18 | 19 | import testing._ 20 | 21 | class GroupingSpec extends UnitSpecification { def is = s2""" 22 | Calling the partition method on Groupings must not trigger a DivByZero exception $e1 23 | """ 24 | 25 | def e1 = Grouping.groupingId[Int].partition(1, 0) must not(throwAn[Exception]) 26 | } 27 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/testing/script/NictaHadoop.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.testing.script 17 | 18 | import org.specs2.specification.Groups 19 | 20 | abstract class NictaHadoop extends com.nicta.scoobi.testing.NictaHadoop with org.specs2.specification.script.SpecificationLike with Groups 21 | 22 | abstract class NictaSimpleJobs extends NictaHadoop with com.nicta.scoobi.testing.SimpleJobs 23 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/core/UniqueInt.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package core 18 | 19 | import java.util.concurrent.atomic.AtomicInteger 20 | 21 | /** Trait that is sub-classed by objects to provide sets of unique identifiers. */ 22 | trait UniqueInt { 23 | private final val counter = new AtomicInteger 24 | def get: Int = counter.incrementAndGet 25 | } 26 | 27 | object UniqueId extends UniqueInt 28 | -------------------------------------------------------------------------------- /examples/fatjar/src/main/scala/FatJar.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.examples 17 | 18 | import com.nicta.scoobi.Scoobi._ 19 | 20 | object WordCount extends ScoobiApp { 21 | def run() { 22 | // Nothing to see here, just a barebones scoobi app. 23 | // look at examples/fatjar/README.md for more information 24 | val d = DList("This", "is", "uninteresting") 25 | d.toTextFile("test", overwrite=true).persist 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/testing/mutable/UnitSpecification.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package testing 18 | package mutable 19 | 20 | import org.specs2.mutable.Specification 21 | import application.HadoopLogFactory 22 | 23 | abstract class UnitSpecification extends Specification { 24 | 25 | // to avoid warnings from the Configurations object 26 | HadoopLogFactory.setLogFactory(quiet = true) 27 | 28 | section("unit") 29 | } 30 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/ActionMenuItem.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import swing.MenuItem 19 | import reactive._ 20 | import java.awt.Component 21 | 22 | case class ActionMenuItem(a: EventSourceAction) extends MenuItem(a.title) with Trigger { 23 | override def self: Component with EventStream[Unit] = this.asInstanceOf[Component with EventStream[Unit]] 24 | 25 | action = a 26 | a foreach { doIt => trigger } 27 | } -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/report/Report.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package report 17 | 18 | import measure._ 19 | 20 | case class Report(name: String = "", 21 | measured: String = "", 22 | unit: String = "", 23 | results: Measurement = Measurement(), 24 | recordsNumber: Long = 0) { 25 | def startTime = results.startTime 26 | def endTime = results.endTime 27 | } 28 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/core/Environment.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package core 18 | 19 | import org.apache.hadoop.conf.Configuration 20 | 21 | /** 22 | * An object holder which can hold a distributed value 23 | */ 24 | trait Environment { 25 | /** push a value so that it's available to distributed processes */ 26 | def push(env: Any)(implicit configuration: Configuration) 27 | /** get a distributed value */ 28 | def pull(implicit configuration: Configuration): Any 29 | 30 | } -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/control/SystemProperties.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package control 19 | 20 | /** 21 | * This trait is introduced to facilitate testing when accessing system properties 22 | */ 23 | private[scoobi] 24 | trait SystemProperties { 25 | def getEnv(name: String): Option[String] = Option(System.getenv(name)) 26 | def get(name: String): Option[String] = sys.props.get(name) 27 | } 28 | 29 | private[scoobi] 30 | object SystemProperties extends SystemProperties 31 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/reactive/Signalx.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package reactive 17 | 18 | object Signalx { 19 | implicit def toVar[T](v: =>T): Var[T] = Var(v) 20 | implicit def toSignalx[T](s: Signal[T]) = new Signalx[T](s) 21 | } 22 | 23 | case class Signalx[T](s: Signal[T]) extends Observing { 24 | def changeIf[S](ev: EventStream[S]) = { 25 | val changed = Var(s.now) 26 | ev.foreach { b => 27 | changed.update(s.now) 28 | } 29 | changed.change | s.change 30 | } 31 | } -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/measure/StringKey.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package measure 17 | 18 | import scalaz.Order 19 | import scalaz.std.string._ 20 | 21 | /** 22 | * Type of strings which can be used for keys 23 | */ 24 | case class StringKey(s: String) { 25 | override def toString = s 26 | } 27 | 28 | object StringKey { 29 | implicit def stringKeyOrder: Order[StringKey] = new Order[StringKey] { 30 | def order(x: StringKey, y: StringKey) = Order[String].order(x.s, y.s) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/testing/mutable/HadoopSpecification.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package testing 18 | package mutable 19 | 20 | import org.specs2.mutable.{SpecificationLike, Specification} 21 | import application.{ClusterConfiguration, ScoobiAppConfiguration} 22 | 23 | /** 24 | * Hadoop specification with an acceptance specification 25 | */ 26 | abstract class HadoopSpecification extends HadoopSpecificationLike 27 | trait HadoopSpecificationLike extends com.nicta.scoobi.testing.HadoopSpecificationLike with SpecificationLike 28 | 29 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/io/thrift/package.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.io 17 | 18 | import com.nicta.scoobi.Scoobi._ 19 | 20 | package object thrift { 21 | 22 | type ThriftLike = org.apache.thrift.TBase[_ <: org.apache.thrift.TBase[_, _], _ <: org.apache.thrift.TFieldIdEnum] 23 | 24 | implicit def ThriftWireFormat[A](implicit m: Manifest[A], ev: A <:< ThriftLike): WireFormat[A] =ThriftSchema.mkThriftFmt[A] 25 | 26 | implicit def ThriftSeqSchema[A](implicit m: Manifest[A], ev: A <:< ThriftLike): SeqSchema[A] = ThriftSchema.mkThriftSchema[A] 27 | } -------------------------------------------------------------------------------- /src/main/scala-2.11/com/nicta/scoobi/application/ILoopCompat.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.application 17 | 18 | import scala.tools.nsc.interpreter.ILoop 19 | 20 | // Without using addThunk we run into deadlock issues in 2.10 21 | // There doesn't appear to be a common function between 2.10 and 2.11 that will preserve the same behaviour 22 | trait ILoopCompat extends ILoop { 23 | 24 | def addThunk(f: => Unit): Unit = 25 | // Evaluating 'f' directly still works, but the scoobi> prompt is shown immediately after "press Enter" 26 | intp.initialize(f) 27 | } 28 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/measure/Measurement.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package measure 17 | 18 | import time._ 19 | /** 20 | * A Measurement is a set of measures over a given time range 21 | */ 22 | case class Measurement(name: String = "", timeRange: DaytimeRange = DaytimeRange(), measures: Seq[Measure[_]] = Seq()) { 23 | 24 | def startTime = timeRange.startTime 25 | def endTime = timeRange.endTime 26 | 27 | /** only used for testing */ 28 | def increment(i: Long) = copy(measures = measures map (_.increment(i))) 29 | } 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /examples/avro/build.sbt: -------------------------------------------------------------------------------- 1 | seq(sbtavro.SbtAvro.avroSettings:_*) 2 | 3 | name := "Scoobi Avro Example" 4 | 5 | version := "0.8.5" 6 | 7 | // Make sure to sync this with a stable release of Scoobi or else weird errors 8 | // like "java.lang.ClassNotFoundException: Class scala.runtime.Nothing when running ..." 9 | // will occur! 10 | scalaVersion := "2.11.2" 11 | 12 | resolvers ++= Seq("nicta" at "http://nicta.github.io/scoobi/releases", 13 | "sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots", 14 | "cloudera" at "https://repository.cloudera.com/content/repositories/releases", 15 | "sbt-plugin-releases" at "http://repo.scala-sbt.org/scalasbt/sbt-plugin-releases", 16 | //Below fixes the "org.scala-tools#vscaladoc;1.1-md3: not found" error: 17 | "scala-tools" at "http://repo.typesafe.com/typesafe/akka-releases-cache") 18 | 19 | libraryDependencies ++= Seq("com.nicta" %% "scoobi" % "0.8.5", 20 | "org.scala-tools" % "vscaladoc" % "1.1-md-3") 21 | 22 | scalacOptions <++= update map { report => 23 | val pluginClasspath = report matching configurationFilter(Configurations.CompilerPlugin.name) 24 | pluginClasspath.map("-Xplugin:" + _.getAbsolutePath).toSeq 25 | } 26 | -------------------------------------------------------------------------------- /notes/0.7.3.markdown: -------------------------------------------------------------------------------- 1 | This is a maintenance release for Scoobi 2 | 3 | ### Improvements 4 | 5 | * improved the diagnostic messages for the existence of configuration files 6 | * added a glob pattern for the cat command in the repl 7 | * checking read/write permissions [#189](https://github.com/NICTA/scoobi/issues/189), pull request by Jeff Zhang 8 | * made the Scoobi REPL `ls`/`cat` support multiple filesystems 9 | * added `avrocat` to the Scoobi REPL 10 | * improved the naming of the Scoobi job and the individual MapReduce jobs ("steps") 11 | * a DObject can be directly read from a path without triggering a MapReduce job [#288](https://github.com/NICTA/scoobi/issues/288) 12 | * added `repl:assembly` to build a fat jar for scoobi-repl (and `repl:dist` to build a tar ball containing the script and fat jar) [#292](https://github.com/NICTA/scoobi/issues/292) 13 | * added a `reduceValues` method to avoid using combiners after a `groupByKey` 14 | 15 | ### Fixes 16 | 17 | * prevented accidental division by zero errors in `TaggedPartitioners` and when composing `Grouping` instances 18 | * made the cat and avrocat commands lazy [#293](https://github.com/NICTA/scoobi/issues/293) 19 | * getting the proper file system for deleting a path [#295](https://github.com/NICTA/scoobi/issues/295) 20 | 21 | ------ 22 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/LabeledFieldPanel.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import swing._ 19 | import Orientation._ 20 | import reactive.{Observing, Signal} 21 | 22 | class LabeledFieldPanel(label: String, text: Signal[String], editable: Boolean = false) extends BoxPanel(Horizontal) with Observing { outer => 23 | protected lazy val textField = new TextField(text.now) { editable = outer.editable } 24 | 25 | contents += new Label(label+" ") 26 | contents += textField 27 | border = Swing.EtchedBorder(Swing.Lowered) 28 | 29 | text.change.foreach { t => textField.text = t } 30 | } 31 | 32 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/control/Functions.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package control 19 | 20 | /** 21 | * This trait provides utility methods for functions 22 | */ 23 | private[scoobi] 24 | trait Functions { 25 | implicit def logicalFunction[A](f: A => Boolean): LogicalFunction[A] = LogicalFunction(f) 26 | 27 | case class LogicalFunction[A](f: A => Boolean) { 28 | def ||(g: A => Boolean) = (a: A) => f(a) || g(a) 29 | def &&(g: A => Boolean) = (a: A) => f(a) && g(a) 30 | def unary_! = (a: A) => !f(a) 31 | } 32 | } 33 | private[scoobi] 34 | object Functions extends Functions 35 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/reactive/FilePoller.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package reactive 17 | 18 | import java.io.File 19 | 20 | class FilePoller(path: Signal[String], delay: Long = 500) extends Trigger { 21 | 22 | private var previousLastModified = new File(path.now).lastModified() 23 | 24 | val timer = new Timer(0, delay, {t => false}) foreach { tick => 25 | def newLastModified = new File(path.now).lastModified() 26 | if (newLastModified > previousLastModified || newLastModified == 0) { 27 | previousLastModified = newLastModified 28 | source.fire(()) 29 | } 30 | } 31 | 32 | } 33 | 34 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/units/MemoryUnit.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package units 17 | /** 18 | * This trait represents all memory units + means of conversion from megabytes 19 | */ 20 | sealed trait MemoryUnit extends MeasureUnit 21 | 22 | object Gigabytes extends MemoryUnit { 23 | val factor = Megabytes.factor * 1000 24 | override def toString = "Gb" 25 | } 26 | 27 | object Megabytes extends MemoryUnit { 28 | val factor = Kilobytes.factor * 1000 29 | override def toString = "Mb" 30 | } 31 | 32 | object Kilobytes extends MemoryUnit { 33 | val factor = 1000L 34 | override def toString = "Kb" 35 | } 36 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/impl/plan/mscr/MscrAttributes.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package plan 19 | package mscr 20 | 21 | import org.specs2.matcher.{Expectable, Matcher} 22 | import org.scalacheck.{Gen, Arbitrary} 23 | 24 | import core._ 25 | import comp._ 26 | import testing.mutable.UnitSpecification 27 | import org.specs2.ScalaCheck 28 | 29 | trait MscrAttributes extends ShowNode with CompNodes { 30 | override def toString = "" 31 | def mscrAttributes = new MscrAttributes {} 32 | implicit def mscrAttributesArbitrary: Arbitrary[MscrAttributes] = Arbitrary(Gen.const(mscrAttributes)) 33 | } 34 | 35 | -------------------------------------------------------------------------------- /src/main/ls/0.4.0.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "organization":"com.nicta", 4 | "name":"scoobi", 5 | "version":"0.4.0", 6 | "description":"scoobi", 7 | "site":"", 8 | "tags":[], 9 | "docs":"", 10 | "licenses": [], 11 | "resolvers": ["https://oss.sonatype.org/content/repositories/releases"], 12 | "dependencies": [{ 13 | "organization":"com.odiago.avro", 14 | "name": "odiago-avro", 15 | "version": "1.0.5" 16 | },{ 17 | "organization":"javassist", 18 | "name": "javassist", 19 | "version": "3.12.1.GA" 20 | },{ 21 | "organization":"org.apache.hadoop", 22 | "name": "hadoop-core", 23 | "version": "0.20.2-cdh3u1" 24 | },{ 25 | "organization":"org.apache.avro", 26 | "name": "avro-mapred", 27 | "version": "1.6.0" 28 | },{ 29 | "organization":"com.thoughtworks.xstream", 30 | "name": "xstream", 31 | "version": "1.4.2" 32 | },{ 33 | "organization":"org.specs2", 34 | "name": "specs2", 35 | "version": "1.11" 36 | },{ 37 | "organization":"org.specs2", 38 | "name": "specs2-scalaz-core", 39 | "version": "6.0.1" 40 | },{ 41 | "organization":"org.mockito", 42 | "name": "mockito-all", 43 | "version": "1.9.0" 44 | },{ 45 | "organization":"org.scalaz", 46 | "name": "scalaz-core", 47 | "version": "6.95" 48 | }], 49 | "scalas": ["2.9.1","2.9.2"], 50 | "sbt": false 51 | } -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/impl/mapreducer/ChannelOutputFormatSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package mapreducer 19 | 20 | import org.apache.hadoop.fs.Path 21 | 22 | import ChannelOutputFormat._ 23 | import testing.mutable.UnitSpecification 24 | import com.nicta.scoobi.io.text.TextFileSink 25 | 26 | class ChannelOutputFormatSpec extends UnitSpecification { 27 | "Channels determine result files for a given job run" >> { 28 | "ch1-2/ is a result directory for a sink with tag 1 and sink id 2" >> { 29 | val sink = TextFileSink(".") 30 | sink.isSinkResult(1)(new Path(s"ch1-${sink.id}/")) 31 | } 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/io/NullDataOutput.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package io 18 | 19 | import java.io.DataOutput 20 | 21 | object NullDataOutput extends DataOutput { 22 | def write(b: Int) {} 23 | def write(b: Array[Byte]) {} 24 | def write(b: Array[Byte], off: Int, len: Int) {} 25 | def writeBoolean(v: Boolean) {} 26 | def writeByte(v: Int) {} 27 | def writeShort(v: Int) {} 28 | def writeChar(v: Int) {} 29 | def writeInt(v: Int) {} 30 | def writeLong(v: Long) {} 31 | def writeFloat(v: Float) {} 32 | def writeDouble(v: Double) {} 33 | def writeBytes(s: String) {} 34 | def writeChars(s: String) {} 35 | def writeUTF(s: String) {} 36 | } -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/impl/collection/MapsSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package collection 19 | 20 | import testing.mutable.UnitSpecification 21 | import scala.collection._ 22 | import Maps._ 23 | 24 | class MapsSpec extends UnitSpecification { 25 | 26 | "A mutable map can be updated with keys from another map and a partial function to select the new keys to be added" >> { 27 | val updated = mutable.Map(1 -> "1", 2 -> "2").updateWith(Map(3 -> "3", 4 -> "4")) { 28 | case (k, v) => 29 | (k, "got: " + v) 30 | } 31 | updated must_== mutable.Map(1 -> "1", 2 -> "2", 3 -> "got: 3", 4 -> "got: 4") 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /notes/0.8.5.markdown: -------------------------------------------------------------------------------- 1 | Maintenance version 2 | 3 | ### Improvements 4 | 5 | * using wire formats to serialise `DObjects` 6 | * added a WireFormat for `java.util.Map[A, B]` 7 | * set the classpath diagnostics as debug info on the client by default 8 | * simplified the use of overwritable text file output format 9 | * added an overwritable text file sink 10 | * added the possibility to fully specify the temp directory with `scoobi.workingdir` and `scoobi.tempdir` 11 | * display only the sink id when pretty printing the graph 12 | * added methods to use an Emitter in the combine phase 13 | * added the possibility to use the task input output context in a `parallelDo` 14 | * added the possibility to access the configuration from a map operation 15 | * throw an exception if the compression codec is not available added a method to check the availability of a codec 16 | * added a "download" sink to collect downloaded files in map tasks 17 | 18 | ### Fixes 19 | 20 | * fixed the passing of `WireFormat` when transforming a `Combine` node in a `ParallelDo` 21 | * fixed the numbering of steps 22 | * set the configuration if possible on the codec before getting its compressor 23 | * fix for checkpoints when one job fails In that case the `_SUCCESS_JOB` file for the other job is not changed into `_SUCCESS` but we should still use the checkpoint if there is one 24 | 25 | ------ 26 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/testing/TempFilesSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package testing 18 | 19 | import org.specs2.matcher.DataTables 20 | import TempFiles._ 21 | import java.io.File 22 | import mutable.{UnitSpecification => UnitSpec} 23 | 24 | class TempFilesSpec extends UnitSpec with DataTables { 25 | 26 | "A path can be calculated relatively to an existing directory" >> { 27 | "directory" || "path" || "relative" |> 28 | "/var/temp/d1" !! "/user/me/temp/d1/1/hello.txt" !! "/var/temp/d1/1/hello.txt" | { (dir, path, relative) => 29 | relativePath(new File(dir), path) === relative 30 | } 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/application/Orderings.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package application 18 | 19 | import org.apache.hadoop.io.{DoubleWritable, BytesWritable, Text} 20 | 21 | 22 | trait Orderings { 23 | implicit def TextOrdering = new Ordering[Text] { 24 | def compare(x: Text, y: Text): Int = x.compareTo(y) 25 | } 26 | 27 | implicit def BytesOrdering = new Ordering[BytesWritable] { 28 | def compare(x: BytesWritable, y: BytesWritable): Int = x.compareTo(y) 29 | } 30 | 31 | implicit def DoubleOrdering = new Ordering[DoubleWritable] { 32 | def compare(x: DoubleWritable, y: DoubleWritable): Int = x.compareTo(y) 33 | } 34 | } 35 | object Orderings extends Orderings -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/WaitCursor.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import swing.UIElement 19 | import java.awt.{Cursor, Point, Toolkit} 20 | import Images._ 21 | import reactive.{Observing, EventStream} 22 | 23 | case class WaitCursor(parent: UIElement)(implicit actionInProgress: EventStream[ActionInProgress]) extends Observing { 24 | private lazy val waitCursor = Toolkit.getDefaultToolkit.createCustomCursor(getImage("clock-icon.png"), new Point(0,0), "cursor") 25 | 26 | actionInProgress.foreach { action => action match { 27 | case Started() => parent.cursor = waitCursor 28 | case Finished() => parent.cursor = new Cursor(Cursor.DEFAULT_CURSOR) 29 | } 30 | } 31 | } -------------------------------------------------------------------------------- /examples/fatjar/build.sbt: -------------------------------------------------------------------------------- 1 | import AssemblyKeys._ 2 | 3 | assemblySettings 4 | 5 | name := "FatJarExample" 6 | 7 | version := "1.0" 8 | 9 | scalaVersion := "2.10.2" 10 | 11 | libraryDependencies ++= Seq( 12 | "com.nicta" %% "scoobi" % "0.7.2" intransitive(), 13 | "com.chuusai" %% "shapeless" % "1.2.4", 14 | "javassist" % "javassist" % "3.12.1.GA", 15 | "org.scala-lang" % "scala-compiler" % "2.10.2", 16 | "com.googlecode.kiama" %% "kiama" % "1.5.1", 17 | "org.apache.avro" % "avro-mapred" % "1.7.4" % "provided", 18 | "org.apache.avro" % "avro" % "1.7.4" % "provided", 19 | "org.apache.hadoop" % "hadoop-client" % "2.0.0-mr1-cdh4.0.1" % "provided", 20 | "org.apache.hadoop" % "hadoop-core" % "2.0.0-mr1-cdh4.0.1" % "provided", 21 | "org.scalaz" %% "scalaz-core" % "7.0.2", 22 | "com.thoughtworks.xstream" % "xstream" % "1.4.3" intransitive()) 23 | 24 | resolvers ++= Seq("sonatype releases" at "http://oss.sonatype.org/content/repositories/releases", 25 | "sonatype snapshots" at "http://oss.sonatype.org/content/repositories/snapshots", 26 | "cloudera" at "https://repository.cloudera.com/content/repositories/releases", 27 | "apache" at "https://repository.apache.org/content/repositories/releases") 28 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/io/text/TextOutputSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta 17 | package scoobi 18 | package io 19 | package text 20 | 21 | import testing._ 22 | import TextOutput._ 23 | 24 | class TextOutputSpec extends UnitSpecification { def is = s2""" 25 | 26 | For delimited text files we need a toString method working on any kind of Product: 27 | Tuples $e1 28 | case class $e2 29 | List $e3 30 | Option $e5 31 | """ 32 | 33 | def e1 = anyToString((1, 2, 3), "|") === "1|2|3" 34 | def e2 = anyToString(A(1, 2, 3), "|") === "1|2|3" 35 | def e3 = anyToString(List(1, 2, 3), "|") === "1|2|3" 36 | def e5 = anyToString(Option(1), "|") === "1" 37 | 38 | case class A(i: Int, j: Int, k: Int) 39 | } 40 | 41 | 42 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/units/TimeUnit.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package units 17 | 18 | import scala.Predef._ 19 | import java.text.SimpleDateFormat 20 | import java.util.{TimeZone, Calendar, Date} 21 | import Calendar._ 22 | import time._ 23 | 24 | /** 25 | * This trait represents all time units + means of conversion from millis 26 | */ 27 | sealed trait TimeUnit extends MeasureUnit 28 | 29 | object Seconds extends TimeUnit { 30 | val factor = 1000L 31 | override def toString = "seconds" 32 | } 33 | 34 | object Millis extends TimeUnit { 35 | val factor = 1L 36 | override def toString = "millis" 37 | } 38 | 39 | object EmptyUnit extends MeasureUnit { 40 | override def toString = "" 41 | val factor = 1L 42 | } 43 | -------------------------------------------------------------------------------- /notes/0.4.0.markdown: -------------------------------------------------------------------------------- 1 | ### New Features 2 | 3 | * ScoobiApp trait for making scoobi apps even easier 4 | * Single import `import com.nicta.scoobi.Scoobi._` 5 | * IO support for avro 6 | * Sequence files automatically convert to/from writable 7 | * [Distributed Objects](http://nicta.github.com/scoobi/Distributed Objects.html) and DList reduction methods (e.g. reduce, product, min, etc.) 8 | * [Testing support](http://nicta.github.com/scoobi/guide/Testing%20guide.html#Testing+guide) 9 | * [Matrix and vector](http://nicta.github.com/scoobi/guide/Extensions.html) extensions 10 | 11 | 12 | ### Changes 13 | 14 | * All methods of Scoobi object are now in Conf object 15 | * `DList.use(x: DObject)` has been removed as part of a complete redesign of Distributed Objects 16 | * To avoid hiding the scala types, the TextInput pattern matcher helpers have been renamed: Int to AnInt, Float to AFloat, Double to ADouble, Long to ALong. 17 | 18 | ### Improvements 19 | 20 | * New [website](http://nicta.github.com/scoobi) and [User Guide](http://nicta.github.com/scoobi/guide/User%20Guide.html#User+Guide) 21 | * Lots of bug fixes 22 | * Improvements in optimiser 23 | * MSCR now imposes less overhead on objects that need no fusion 24 | * Join/coGroup API cleaned up, and performance improved 25 | * Better support for other filesystems such as S3 26 | * Refactoring of data input and output APIs making user extensions easier to add 27 | 28 | ------ 29 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/acceptance/NumberPartitionerSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package acceptance 18 | 19 | import Scoobi._ 20 | import org.specs2.matcher.Matcher 21 | 22 | import testing.mutable.NictaSimpleJobs 23 | 24 | class NumberPartitionerSpec extends NictaSimpleJobs { 25 | 26 | "Numbers can be partitioned into even and odd numbers" >> { implicit sc: SC => 27 | val numbers = DList((1 to count).map(i => r.nextInt(count * 2)):_*) 28 | val (evens, odds) = numbers.partition(_ % 2 == 0) 29 | 30 | forall(evens.run)(i => i must beEven) 31 | forall(odds.run)(i => i must beOdd) 32 | } 33 | 34 | val r = new scala.util.Random 35 | val count = 5 36 | 37 | def beEven: Matcher[Int] = (i: Int) => (i % 2 == 0, i + " is not even") 38 | def beOdd = beEven.not 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/testing/UploadedLibJars.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package testing 18 | 19 | import org.specs2.specification._ 20 | import application.ScoobiUserArgs 21 | 22 | /** 23 | * This trait can be mixed in a Specification to automatically add a setup step uploading the library jars to the cluster 24 | */ 25 | trait UploadedLibJars extends SpecificationStructure with HadoopExamples with ScoobiUserArgs { 26 | 27 | /** 28 | * add a first step to upload the library jars before doing anything else 29 | */ 30 | override def map(fs: =>Fragments) = fs.insert(uploadStep) 31 | 32 | /** create a Step to upload the jars on the cluster */ 33 | def uploadStep = Step(if (!isLocalOnly && !noLibJars) uploadLibJarsFiles(deleteLibJarsFirst = deleteLibJars)(cluster.configuration)) 34 | 35 | } -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/DependenciesSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | 18 | import org.specs2.specification.Analysis 19 | import testing.mutable.UnitSpecification 20 | 21 | class DependenciesSpec extends UnitSpecification with Analysis { 22 | val application = 23 | layers( 24 | "testing lib", 25 | "application", 26 | "io io.seq") 27 | 28 | val implementation = 29 | layers( 30 | "exec", 31 | "plan plan.source plan.comp plan.mscr mapreducer", 32 | "time reflect rtt", 33 | "util control text collection monitor").withPrefix("impl") 34 | 35 | val core = 36 | layers("core") 37 | 38 | eg { 39 | Layers(application.layers ++ 40 | implementation.layers ++ 41 | core.layers ).withPrefix("com.nicta.scoobi") must beRespected 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/LabeledComboBoxPanel.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import swing._ 19 | import event.SelectionChanged 20 | import Orientation._ 21 | import reactive.{EventStream} 22 | import java.awt.Component 23 | import reactive.EventStreamSourceProxy 24 | 25 | class LabeledComboBoxPanel[A](label: String, values: Seq[A]) extends BoxPanel(Horizontal) with EventStreamSourceProxy[String] { 26 | override def self: Component with EventStream[String] = this.asInstanceOf[Component with EventStream[String]] 27 | 28 | protected lazy val combo = new ComboBox(values) 29 | 30 | contents += new Label(label+" ") 31 | contents += combo 32 | border = Swing.EtchedBorder(Swing.Lowered) 33 | 34 | combo.reactions += { 35 | case SelectionChanged(c) => source.fire(combo.selection.item.toString) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/io/thrift/ThriftSchemaSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.io.thrift 17 | 18 | import java.io._ 19 | 20 | import com.nicta.scoobi.Scoobi._ 21 | import org.specs2.ScalaCheck 22 | import org.specs2.mutable.Specification 23 | 24 | class ThriftSchemaSpec extends Specification with ScalaCheck { 25 | 26 | "WireFormat bidirectional" >> prop((s: String) => { 27 | implicit val wf = implicitly[WireFormat[MyThrift]] 28 | val a = new MyThrift(s) 29 | val out = new ByteArrayOutputStream() 30 | wf.toWire(a, new DataOutputStream(out)) 31 | wf.fromWire(new DataInputStream(new ByteArrayInputStream(out.toByteArray))) ==== a 32 | }) 33 | 34 | "SeqSchema bidirectional" >> prop((s: String) => { 35 | implicit val ss = implicitly[SeqSchema[MyThrift]] 36 | val a = new MyThrift(s) 37 | ss.fromWritable(ss.toWritable(a)) ==== a 38 | }) 39 | } -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/testing/HadoopSpecification.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package testing 18 | 19 | import org.specs2.Specification 20 | import application.ScoobiAppConfiguration 21 | 22 | /** 23 | * Hadoop specification with an acceptance specification 24 | */ 25 | abstract class HadoopSpecification extends HadoopSpecificationLike 26 | 27 | trait HadoopSpecificationLike extends Specification with HadoopSpecificationStructure with ScoobiAppConfiguration { 28 | // this configuration object needs to be explicit (rather than implicit) 29 | // otherwise it will clash with the implicit sc: ScoobiConfiguration declaration that's used for each example 30 | // this configuration object is used by the ClusterConfiguration trait to determine the settings for fs/jobTracker 31 | override lazy val configuration = super[ScoobiAppConfiguration].configuration 32 | } 33 | 34 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/testing/mutable/NictaHadoop.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package testing 18 | package mutable 19 | 20 | import org.specs2.mutable.Tags 21 | import core.ScoobiConfiguration 22 | import org.specs2.specification.Fragments 23 | 24 | /** 25 | * This trait can be used to create Hadoop specifications on the NictaCluster 26 | */ 27 | trait NictaHadoop extends 28 | mutable.HadoopSpecification with 29 | Tags with 30 | NictaCluster { 31 | 32 | /**this type alias makes it shorter to pass a new configuration object to each example */ 33 | type SC = ScoobiConfiguration 34 | 35 | def acceptanceSection = section("hadoop") 36 | 37 | override def map(fs: =>Fragments) = super.map(fs).insert(acceptanceSection).add(acceptanceSection) 38 | } 39 | 40 | /** 41 | * A trait for simple jobs running on the NICTA cluster 42 | */ 43 | trait NictaSimpleJobs extends NictaHadoop with SimpleJobs -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/collection/Maps.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package collection 19 | 20 | import scala.collection._ 21 | 22 | /** 23 | * This trait provides utility methods on maps, especially mutable maps 24 | */ 25 | private[scoobi] 26 | trait Maps { 27 | 28 | implicit def extendMutableMap[K, V](map: mutable.Map[K, V]) = new ExtendedMutableMap[K, V](map) 29 | class ExtendedMutableMap[K, V](map: mutable.Map[K, V]) { 30 | /** 31 | * update a mutable map with new keys and values existing in another map 32 | * @return the original mutable map 33 | */ 34 | def updateWith(other: Map[K, V])(update: PartialFunction[(K, V), (K, V)]) = { 35 | (other -- map.keys) foreach { kv => 36 | if (update.isDefinedAt(kv)) { 37 | map += update.apply(kv) 38 | } 39 | } 40 | map 41 | } 42 | } 43 | } 44 | 45 | object Maps extends Maps 46 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/PositionedBorderPanel.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import swing.{Component, BorderPanel} 19 | 20 | case class PositionedBorderPanel(center: Component = NoComponent, 21 | north: Component = NoComponent, 22 | south: Component = NoComponent, 23 | east: Component = NoComponent, 24 | west: Component = NoComponent) extends BorderPanel { 25 | import BorderPanel._ 26 | 27 | if (center != NoComponent) layout(center) = Position.Center 28 | if (north != NoComponent) layout(north) = Position.North 29 | if (south != NoComponent) layout(south) = Position.South 30 | if (east != NoComponent) layout(east) = Position.East 31 | if (west != NoComponent) layout(west) = Position.West 32 | } 33 | 34 | object NoComponent extends Component -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/acceptance/WordCountSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package acceptance 18 | 19 | import Scoobi._ 20 | import testing.mutable.NictaSimpleJobs 21 | import core.Reduction.{Reduction => R} 22 | 23 | class WordCountSpec extends NictaSimpleJobs { 24 | 25 | "Counting words frequencies must return the frequency for each word" >> { implicit sc: SC => 26 | 27 | val frequencies = 28 | DList(repeat("hello" -> 3, "world" -> 4, "universe" -> 2):_*). 29 | mapFlatten(_.split(" ")).map((_, 1)). 30 | groupByKey. 31 | filter { case (word, n) => word.length < 6 }. 32 | combine(R.Sum.int) 33 | 34 | frequencies.run.sorted must_== Seq(("hello", 3), ("world", 4)) 35 | 36 | } 37 | /** @return a Seq of strings where each key has been duplicated a number of times indicated by the value */ 38 | def repeat(m: (String, Int)*): Seq[String] = m.flatMap { case (k, v) => Seq.fill(v)(k) } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /bin/ci-repl-snapshot: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | 3 | JVM_OPTS="-Dfile.encoding=UTF8 -XX:MaxPermSize=512m -Xms512m -Xmx2g -XX:+CMSClassUnloadingEnabled -XX:+UseConcMarkSweepGC"; export JVM_OPTS 4 | 5 | ./sbt -Dsbt.log.noformat=true ';set version <<= version(_.replace("SNAPSHOT", "cdh5-SNAPSHOT")); clean; update; compile; repl:dist' 6 | 7 | tarball=`basename target/scoobi-repl-*.tar.gz` 8 | $S3CMD_HOME/s3cmd --access_key=$AWS_ACCESS_KEY --secret_key=$AWS_SECRET_KEY sync target/$tarball s3://ambiata-dist 9 | $S3CMD_HOME/s3cmd --access_key=$AWS_ACCESS_KEY --secret_key=$AWS_SECRET_KEY mv s3://ambiata-dist/$tarball s3://ambiata-dist/scoobi-repl/scoobi-repl-cdh5.tgz 2>&1 10 | rm target/scoobi-repl-*.tar.gz 11 | 12 | ./sbt -Dsbt.log.noformat=true ';set version <<= version(_.replace("SNAPSHOT", "cdh3-SNAPSHOT")); clean; update; compile; repl:dist' 13 | 14 | tarball=`basename target/scoobi-repl-*.tar.gz` 15 | $S3CMD_HOME/s3cmd --access_key=$AWS_ACCESS_KEY --secret_key=$AWS_SECRET_KEY sync target/$tarball s3://ambiata-dist 16 | $S3CMD_HOME/s3cmd --access_key=$AWS_ACCESS_KEY --secret_key=$AWS_SECRET_KEY mv s3://ambiata-dist/$tarball s3://ambiata-dist/scoobi-repl/scoobi-repl-cdh3.tgz 2>&1 17 | rm target/scoobi-repl-*.tar.gz 18 | 19 | ./sbt -Dsbt.log.noformat=true ';clean; update; compile; repl:dist' 20 | 21 | tarball=`basename target/scoobi-repl-*.tar.gz` 22 | $S3CMD_HOME/s3cmd --access_key=$AWS_ACCESS_KEY --secret_key=$AWS_SECRET_KEY sync target/$tarball s3://ambiata-dist 23 | $S3CMD_HOME/s3cmd --access_key=$AWS_ACCESS_KEY --secret_key=$AWS_SECRET_KEY mv s3://ambiata-dist/$tarball s3://ambiata-dist/scoobi-repl/scoobi-repl.tgz 2>&1 24 | rm target/scoobi-repl-*.tar.gz -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/BackgroundAction.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import reactive._ 19 | import swing.SwingWorker 20 | 21 | class BackgroundAction[T](action: =>T) { 22 | 23 | def inBackground(implicit progress: EventSource[ActionInProgress] = new EventSource[ActionInProgress]()): EventStream[T] = { 24 | val result: EventSource[T] = new EventSource 25 | val worker = new SwingWorker { 26 | def act() { 27 | try { 28 | progress.fire(Started()) 29 | result.fire(action) 30 | } finally { progress.fire(Finished()) } 31 | } 32 | } 33 | worker.start() 34 | result 35 | } 36 | } 37 | 38 | object BackgroundAction { 39 | implicit def actionToBackgroundAction[T](action: =>T) = new BackgroundAction(action) 40 | } 41 | 42 | sealed trait ActionInProgress 43 | case class Started() extends ActionInProgress 44 | case class Finished() extends ActionInProgress 45 | -------------------------------------------------------------------------------- /notes/0.2.0.markdown: -------------------------------------------------------------------------------- 1 | ### New features 2 | 3 | * [Java bindings](http://nicta.github.com/scoobi/java/master/index.html) 4 | * Relational functionality: [`join`](http://nicta.github.com/scoobi/master/index.html#com.nicta.scoobi.lib.Join$), [`joinOn`](http://nicta.github.com/scoobi/master/index.html#com.nicta.scoobi.lib.Join$), [`coGroup`](http://nicta.github.com/scoobi/master/index.html#com.nicta.scoobi.lib.CoGroup$) and [`coGroupOn`](http://nicta.github.com/scoobi/master/index.html#com.nicta.scoobi.lib.CoGroup$) 5 | * New [`DList`](http://nicta.github.com/scoobi/master/index.html#com.nicta.scoobi.DList) methods: 6 | * `parallelDo` - a sledge-hammer for when `flatMap` isn't enough 7 | * `collect` - applies a partial function to each element 8 | * `by` - create a keyed `DList` for join and co-group operations 9 | * [`WireFormat`](http://nicta.github.com/scoobi/master/index.html#com.nicta.scoobi.WireFormat) helpers support for algebraic data types (ADTs): 10 | * `mkAbstractWireFormat` 11 | * `mkCaseWireFormat` (formerly just `mkWireFormat`) 12 | * `mkObjectWireFormat` 13 | Read more on the [wiki page for serialization](https://github.com/NICTA/scoobi/wiki/Serialization) 14 | * Migration to new Hadoop API (i.e. use of *context* objects) 15 | * [*Shortest path* example](https://github.com/NICTA/scoobi/blob/master/examples/shortestPath/src/main/scala/Graph.scala) 16 | 17 | ### Bug fixes 18 | 19 | * Checking all inputs exist before running a job 20 | * Bug fixes in *word count* example 21 | * Typos in documentation 22 | * Speed ups and support for all dataypes in ClassBuilder 23 | * Fix for reference counting intermediate data 24 | 25 | ------ 26 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/io/thrift/ThriftSerialiser.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.io.thrift 17 | 18 | import org.apache.thrift.{TDeserializer, TSerializer} 19 | import org.apache.thrift.protocol.TCompactProtocol 20 | 21 | /** 22 | * Util for converting a `ThriftLike` object to and from bytes. 23 | * 24 | * WARNING: This class is _not_ threadsafe and should be used with extreme caution! 25 | * 26 | * https://issues.apache.org/jira/browse/THRIFT-2218 27 | */ 28 | case class ThriftSerialiser() { 29 | 30 | val serialiser = new TSerializer(new TCompactProtocol.Factory) 31 | val deserialiser = new TDeserializer(new TCompactProtocol.Factory) 32 | 33 | def toBytes[A](a: A)(implicit ev: A <:< ThriftLike): Array[Byte] = 34 | serialiser.serialize(ev(a)) 35 | 36 | def fromBytes[A](empty: A, bytes: Array[Byte])(implicit ev: A <:< ThriftLike): A = { 37 | val e = ev(empty).deepCopy 38 | e.clear() 39 | deserialiser.deserialize(e, bytes) 40 | e.asInstanceOf[A] 41 | } 42 | } -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/rtt/TaggedValue.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package rtt 19 | 20 | import org.apache.hadoop.io.Writable 21 | import core._ 22 | import impl.ScoobiConfiguration 23 | import org.apache.hadoop.conf.Configuration 24 | 25 | /** 26 | * A tagged value for Hadoop values. Specifically this will be a V2 type so must 27 | * implement the Writable interface. 28 | * 29 | * Before using a TaggedValue the appropriate tag must be set. By default, it is 0 30 | */ 31 | trait TaggedValue extends Tagged with Writable 32 | 33 | /** Companion object for dynamically constructing a subclass of TaggedValue. */ 34 | object TaggedValue { 35 | def apply(name: String, tags: Map[Int, Tuple1[WireReaderWriter]], classLoader: ClassLoader, configuration: Configuration): RuntimeClass = 36 | MetadataClassBuilder[MetadataTaggedValue](name, tags, classLoader, configuration).toRuntimeClass 37 | } 38 | 39 | abstract class MetadataTaggedValue extends TaggedValue with MetadataTaggedWritable 40 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/core/DObject.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package core 18 | 19 | /** 20 | * A wrapper around an object that is part of the graph of a distributed computation 21 | */ 22 | trait DObject[A] extends Persistent[A] { 23 | type T = DObject[A] 24 | type C <: ValueNode 25 | 26 | implicit def wf: WireFormat[A] = getComp.wf.asInstanceOf[WireFormat[A]] 27 | 28 | /** Create a new distributed object by apply a function to this distributed object */ 29 | def map[B : WireFormat](f: A => B): DObject[B] 30 | 31 | /** 32 | * Create a new distributed list by replicating the value of this distributed object 33 | * to every element within the provided distributed list 34 | */ 35 | def join[B : WireFormat](list: DList[B]): DList[(A, B)] 36 | def zip[B : WireFormat](o: DObject[B]): DObject[(A, B)] 37 | 38 | def toSingleElementDList: DList[A] 39 | def toDList[B](implicit ev: A <:< Iterable[B], wfb: WireFormat[B]): DList[B] = toSingleElementDList.mapFlatten(x => x) 40 | } 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/text/Showx.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package text 19 | 20 | import scalaz.Show 21 | 22 | /** 23 | * Extension for the Show functionality of Scalaz for sequences where the elements share a same Show instance 24 | * 25 | * Note: there are ways to transform this code so that more specific Show instances are picked up for each element in the list 26 | * (see StackOverflow and Shapeless) 27 | */ 28 | object Showx { 29 | def parens[T : Show](t: T) = Seq(t).showString 30 | 31 | implicit def showSeq[T : Show](seq: Seq[T]): ShowSeq[T] = new ShowSeq(seq) 32 | class ShowSeq[T : Show](seq: Seq[T]) { 33 | def showString: String = showString() 34 | def showString(separator: String = ","): String = showString("(", separator, ")") 35 | def showString(start: String, end: String): String = showString("(", ",", ")") 36 | def showString(start: String, separator: String, end: String): String = seq.map(implicitly[Show[T]].show).mkString(start, separator, end) 37 | } 38 | } -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/impl/util/SerialiserSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package util 19 | 20 | import testing.mutable.UnitSpecification 21 | import org.apache.hadoop.conf.Configuration 22 | import java.io.{OutputStream, InputStream, ByteArrayInputStream, ByteArrayOutputStream} 23 | 24 | class SerialiserSpec extends UnitSpecification { 25 | "it is possible to serialize a configuration object without its classloader" >> { 26 | serialise(new Configuration).toString must not contain("classLoader") 27 | } 28 | 29 | "a serialised object must not failed to be serialised even with unicode characters" >> { 30 | deserialise(serialise("abc\\u001")) === "abc\\u001" 31 | } 32 | 33 | def serialise(a: Any): ByteArrayOutputStream = { 34 | val out = new ByteArrayOutputStream 35 | Serialiser.serialise(a, out) 36 | out 37 | } 38 | 39 | def deserialise(in: ByteArrayOutputStream): Any = { 40 | val input = new ByteArrayInputStream(in.toByteArray) 41 | Serialiser.deserialise(input) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/guide/ScoobiPage.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package guide 18 | 19 | import org.specs2.Specification 20 | import org.specs2.specification._ 21 | import org.specs2.specification.Snippets 22 | 23 | /** 24 | * base class for creating Scoobi user guide pages. 25 | * 26 | * If the text contains "${VERSION}", each occurrence will be replaced by the current Scoobi version as defined in the build.sbt file 27 | * If the text contains "${BRANCH}", each occurrence will be replaced by either the official tag or master if the version is a SNAPSHOT one 28 | */ 29 | trait ScoobiPage extends Specification with ScoobiVariables with Snippets { 30 | override def map(fs: =>Fragments) = 31 | Fragments.create(fs.fragments.map { 32 | case start: SpecStart if isIndex(start) => start.urlIs("index.html") 33 | case start: SpecStart => start.baseDirIs(s"./$GUIDE_DIR") 34 | case other => other 35 | }:_*) 36 | 37 | private def isIndex(start: SpecStart) = start.specName.javaClassName endsWith "Index" 38 | } 39 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/acceptance/TextFileSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package acceptance 18 | 19 | import com.nicta.scoobi.testing.{TestFiles, TempFiles} 20 | import testing.mutable.NictaSimpleJobs 21 | import Scoobi._ 22 | import TestFiles._ 23 | 24 | class TextFileSpec extends NictaSimpleJobs { 25 | "A text file that is saved with overwrite = true can be rewritten" >> { implicit sc: SC => 26 | val list = DList(1, 2, 3) 27 | 28 | val path1 = path(TempFiles.createTempFilePath("path")) 29 | val l1 = list.toTextFile(path1, overwrite = false) 30 | l1.run // run once 31 | 32 | // an exception must be thrown the 2nd time 33 | val l2 = DList(1, 2, 3).toTextFile(path1, overwrite = false) 34 | l2.run must throwAn[Exception] 35 | 36 | val path2 = path(TempFiles.createTempFilePath("path")) 37 | val l3 = list.toTextFile(path2, overwrite = true) 38 | l3.run // run once 39 | 40 | // an exception must not be thrown the 2nd time 41 | val l4 = DList(1, 2, 3).toTextFile(path2, overwrite = true) 42 | l4.run must not(throwAn[Exception]) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/time/Timestamped.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package time 17 | 18 | import scalaz._ 19 | import Scalaz._ 20 | import scalazx.Reducer._ 21 | import times._ 22 | 23 | /** 24 | * Useful trait for anything having a timestamp, like a log record or a measure 25 | */ 26 | trait Timestamped { 27 | val startTime: Long 28 | def ddMMhhmm = epochtime(startTime).ddMMhhmm 29 | def hhmm = epochtime(startTime).hhmm 30 | def hhmmssS = epochtime(startTime).hhmmssS 31 | def mmssS = epochtime(startTime).mmssS 32 | 33 | /** @return true if the time range is not defined or if the start time is in the time range */ 34 | def isInTimeRange(timeRange: DaytimeRange) = timeRange.contains(epochtime(startTime)) 35 | 36 | // show the record timestamp 37 | def atTime = " @"+startTime 38 | def hhmmAtTime = hhmm+atTime 39 | } 40 | 41 | object Timestamped { 42 | /** timestamps define a natural order */ 43 | val startTimeOrder = order((_:Timestamped).startTime) 44 | /** when the order should not change */ 45 | val noOrder = order((t:Timestamped) => 0L) 46 | 47 | } -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/impl/util/DistCacheSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package util 19 | 20 | import org.specs2.specification._ 21 | import script.SpecificationLike 22 | import org.apache.hadoop.conf.Configuration 23 | import com.nicta.scoobi.testing.UnitSpecification 24 | 25 | class DistCacheSpec extends UnitSpecification with SpecificationLike with Groups { def is = s2""" 26 | 27 | The DistCache object can be object to serialise objects and push them to Hadoop's distributed cache 28 | 29 | + pushObject/pullObject must bring back the same object 30 | + when an object is push, we try to deserialise it right away and throw an exception if this is not possible 31 | """ 32 | 33 | "dist cache" - new group { 34 | eg := { 35 | val configuration = new Configuration 36 | DistCache.pushObject(configuration, "hello world", "tag1") 37 | DistCache.pullObject[String](configuration, "tag1") must beSome("hello world") 38 | } 39 | 40 | eg := { 41 | DistCache.pushObject(new Configuration, getClass.getClassLoader, "tag1") must throwAn[Exception] 42 | } 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/acceptance/BoundedFilterSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package acceptance 18 | 19 | import Scoobi._ 20 | import testing.mutable.NictaSimpleJobs 21 | 22 | class BoundedFilterSpec extends NictaSimpleJobs { 23 | 24 | "Filtering with DObjects" >> { 25 | "Filtering with lower and upper bounds removes all values outside a range" >> { implicit c: SC => 26 | 27 | val xs = DList(1, 2, 3, 4) 28 | 29 | val lower = DObject(1) 30 | val upper = DObject(4) 31 | 32 | val ys = ((lower, upper) join xs).filter { case ((l, u), x) => x > l && x < u }.values 33 | val total = ys.sum 34 | 35 | total.run === 5 36 | } 37 | 38 | "Filtering by average removes all values less than the average" >> { implicit c: SC => 39 | 40 | val ints = Seq(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) 41 | 42 | val xs = ints.toDList 43 | val average = (xs.sum, xs.size) map { case (t, s) => t / s } 44 | val bigger = (average join xs) filter { case (a, x) => x > a } 45 | bigger.values.run.sorted must_== ints.filter(_ > (ints.sum / ints.size)) 46 | 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/application/Cluster.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package application 18 | 19 | import impl.ScoobiConfiguration 20 | import ScoobiConfiguration._ 21 | import com.nicta.scoobi.impl.util.Compatibility 22 | 23 | /** 24 | * Definition of the Cluster addresses: FileSystem + JobTracker 25 | */ 26 | trait Cluster { 27 | /** @return the filesystem address */ 28 | def fs: String 29 | 30 | /** @return the jobtracker address */ 31 | def jobTracker: String 32 | 33 | /** @return the framework name: yarn, classic or local. This is only relevant for CDH5 */ 34 | def frameworkName: String 35 | } 36 | 37 | 38 | /** 39 | * Implementation of the Cluster trait taking the configuration from a ScoobiConfiguration object 40 | */ 41 | trait ClusterConfiguration extends Cluster { 42 | 43 | def configuration: com.nicta.scoobi.core.ScoobiConfiguration 44 | 45 | def fs = configuration.get(Compatibility.defaultFSKeyName, "file:///") 46 | def jobTracker = configuration.get("mapred.job.tracker", "local") 47 | def frameworkName = configuration.get("mapreduce.framework.name", "local") 48 | } 49 | 50 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/testing/SimpleJobs.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package testing 18 | 19 | import application._ 20 | import core._ 21 | 22 | /** 23 | * This trait helps in the creation of DLists and Scoobi jobs where the user doesn't have to track the creation of files. 24 | * All data is written to temporary files and is deleted after usage. 25 | */ 26 | trait SimpleJobs extends Persist { outer => 27 | 28 | /** 29 | * @return a simple job from a list of strings (for the input file) and the current configuration 30 | */ 31 | def fromInput(ts: String*)(implicit c: ScoobiConfiguration) = 32 | InputStringTestFile(ts).lines 33 | 34 | /** 35 | * @return a DList input keeping track of its temporary input file 36 | */ 37 | def fromDelimitedInput(ts: String*)(implicit c: ScoobiConfiguration) = 38 | new InputTestFile[List[String]](ts, mapping = (_:String).split(",").toList).lines 39 | 40 | def fromKeyValues(ts: String*)(implicit c: ScoobiConfiguration): DList[(String, String)] = 41 | fromDelimitedInput(ts:_*).map { case k :: v :: _ => (k, v); case line => ("error", "could not split line "+line) } 42 | } 43 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/impl/control/FunctionsSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package control 19 | 20 | import Functions._ 21 | import testing.mutable.UnitSpecification 22 | 23 | class FunctionsSpec extends UnitSpecification { 24 | 25 | "functions can be or-ed with ||" >> { 26 | val f1: String => Boolean = (_:String).length < 3 27 | val f2: String => Boolean = (_:String).length < 5 28 | 29 | (f1 || f2)("abcdefg") must beFalse 30 | (f1 || f2)("abc") must beTrue 31 | (f1 || f2)("abcd") must beTrue 32 | (f2 || f1)("ab") must beTrue 33 | } 34 | "functions can be and-ed with &&" >> { 35 | val f1: String => Boolean = (_:String).length < 3 36 | val f2: String => Boolean = (_:String).length < 5 37 | (f1 && f2)("abcdefg") must beFalse 38 | (f1 && f2)("abc") must beFalse 39 | (f1 && f2)("abcd") must beFalse 40 | (f2 && f1)("ab") must beTrue 41 | } 42 | "functions can be negated with !" >> { 43 | val f1: String => Boolean = (_:String).length < 3 44 | 45 | (!f1)("abcdefg") must beTrue 46 | (!f1)("ab") must beFalse 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/OpenFileMenuItem.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import swing._ 19 | import javax.swing.KeyStroke 20 | import java.awt.event.{ActionEvent, KeyEvent} 21 | import Images._ 22 | import java.io.File 23 | import reactive._ 24 | import swing.FileChooser.Result 25 | import java.awt.Component 26 | import reactive.EventStreamSourceProxy 27 | 28 | case class OpenFileMenuItem(start: String, label: String = "Open") extends MenuItem(label) with EventStreamSourceProxy[File] { outer => 29 | override def self: Component with EventStream[File] = this.asInstanceOf[Component with EventStream[File]] 30 | 31 | val fileChooser = new FileChooser(new java.io.File(start)) 32 | 33 | action = new Action(label) { 34 | icon = getIcon("folder-icon.png") 35 | mnemonic = KeyEvent.VK_O 36 | accelerator = Some(KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.ALT_MASK)) 37 | 38 | def apply() { 39 | fileChooser.showOpenDialog(outer) match { 40 | case Result.Approve => source.fire(fileChooser.selectedFile) 41 | case _ => () 42 | } 43 | } 44 | } 45 | } 46 | 47 | 48 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/application/LocalHadoop.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package application 18 | 19 | import core._ 20 | import Mode._ 21 | 22 | /** 23 | * Execute Hadoop code locally 24 | */ 25 | trait LocalHadoop extends InMemoryHadoop { 26 | 27 | /** execute some code locally, possibly showing execution times */ 28 | def onLocal[T](t: =>T)(implicit configuration: ScoobiConfiguration) = 29 | showTime(executeOnLocal(t))(displayTime("Local execution time")) 30 | 31 | /** execute some code locally */ 32 | def executeOnLocal[T](t: =>T)(implicit configuration: ScoobiConfiguration) = { 33 | setLogFactory() 34 | configureForLocal 35 | runOnLocal(t) 36 | } 37 | 38 | /** 39 | * @return the result of the local run 40 | */ 41 | def runOnLocal[T](t: =>T) = t 42 | 43 | /** 44 | * @return a configuration with local setup 45 | */ 46 | def configureForLocal(implicit configuration: ScoobiConfiguration): ScoobiConfiguration = { 47 | configureArguments 48 | configuration.modeIs(Local) 49 | if (!configuration.jobName.isDefined) configuration.jobNameIs(getClass.getSimpleName) 50 | configuration.setAsLocal 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/core/CheckpointSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package core 18 | 19 | import testing.TestFiles 20 | import testing.mutable.UnitSpecification 21 | import org.specs2.mock.Mockito 22 | import impl.ScoobiConfigurationImpl 23 | import org.apache.hadoop.fs.{FileStatus, Path, FileSystem} 24 | import io.avro.AvroOutput._ 25 | 26 | class CheckpointSpec extends UnitSpecification with Mockito { 27 | "A checkpoint exists on a Sink if the sink is a checkpoint and if there are files with previous results in the output directory" >> { 28 | val mockFs = mock[FileSystem] 29 | implicit val configuration = mock[ScoobiConfiguration] 30 | configuration.fileSystem returns mockFs 31 | 32 | val checkpoint = avroSink[Int](TestFiles.createTempFile("test")(new ScoobiConfigurationImpl).getPath, checkpoint = true) 33 | 34 | // the output path exists 35 | mockFs.exists(any[Path]) returns true 36 | // there are files in the output directory 37 | val status = mock[FileStatus] 38 | mockFs.listStatus(any[Path]) returns Array(status) 39 | status.getPath returns new Path("_SUCCESS") 40 | 41 | checkpoint.checkpointExists(configuration) must beTrue 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/rtt/TaggedPartitioner.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package rtt 19 | 20 | import org.apache.hadoop.mapreduce.Partitioner 21 | import core._ 22 | import impl.ScoobiConfiguration 23 | import org.apache.hadoop.conf.Configuration 24 | 25 | /** Custom partitioner for tagged key-values. */ 26 | trait TaggedPartitioner extends Partitioner[TaggedKey, TaggedValue] 27 | 28 | /** Companion object for dynamically constructing a subclass of TaggedPartitioner. */ 29 | object TaggedPartitioner { 30 | def apply(name: String, tags: Map[Int, (WireReaderWriter, KeyGrouping)], classLoader: ClassLoader, configuration: Configuration): RuntimeClass = 31 | MetadataClassBuilder[MetadataTaggedPartitioner](name, tags, classLoader, configuration).toRuntimeClass 32 | } 33 | 34 | /** 35 | * This partitioner uses the grouping of the current key tag and partitions based on the key value 36 | */ 37 | abstract class MetadataTaggedPartitioner extends TaggedPartitioner with MetadataWireFormats with MetadataGroupings { 38 | def getPartition(key: TaggedKey, value: TaggedValue, numPartitions: Int): Int = { 39 | grouping(key.tag).partition(key.get(key.tag), numPartitions) 40 | } 41 | } 42 | 43 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/units/MeasureUnit.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package units 17 | /** 18 | * This trait represents any Unit type 19 | * 20 | * It can't be called Unit because this conflicts with the Unit type in scala 21 | * 22 | * A MeasureUnit can be converted to another Measure unit assuming that: 23 | * 24 | * - there is one MeasureUnit for "base" values, for example Millis 25 | * - there is a method to convert a "base" value to a "higher" unit, for example Seconds.fromBase converts millis into seconds (toBase is the inverse) 26 | */ 27 | trait MeasureUnit { 28 | 29 | /** a MeasureUnit can be displayed */ 30 | def show: String = toString 31 | 32 | def fromBase = (value: Long) => value / factor 33 | def toBase = (value: Long) => value * factor 34 | 35 | /** 36 | * each unit must define a conversion factor with the unit just below it. For example the factor for Minutes is 60 37 | */ 38 | def factor: Long 39 | 40 | /** a function creating a value can be lifted to a function creating a quantity, by providing a name for the quantity */ 41 | def createQuantity[T](name: String, f: T => Long): T => Quantity = (t: T) => new Quantity(name, f(t), this) 42 | } 43 | -------------------------------------------------------------------------------- /src/main/ls/0.7.0.json: -------------------------------------------------------------------------------- 1 | { 2 | "organization" : "com.nicta", 3 | "name" : "scoobi", 4 | "version" : "0.7.0", 5 | "description" : "scoobi", 6 | "site" : "", 7 | "tags" : [ ], 8 | "docs" : "", 9 | "resolvers" : [ "https://oss.sonatype.org/content/repositories/releases" ], 10 | "dependencies" : [ { 11 | "organization" : "org.apache.hadoop", 12 | "name" : "hadoop-client", 13 | "version" : "2.0.0-mr1-cdh4.0.1" 14 | }, { 15 | "organization" : "org.apache.hadoop", 16 | "name" : "hadoop-core", 17 | "version" : "2.0.0-mr1-cdh4.0.1" 18 | }, { 19 | "organization" : "javassist", 20 | "name" : "javassist", 21 | "version" : "3.12.1.GA" 22 | }, { 23 | "organization" : "org.apache.avro", 24 | "name" : "avro-mapred", 25 | "version" : "1.7.3.1" 26 | }, { 27 | "organization" : "org.apache.avro", 28 | "name" : "avro", 29 | "version" : "1.7.3.1" 30 | }, { 31 | "organization" : "com.thoughtworks.xstream", 32 | "name" : "xstream", 33 | "version" : "1.4.3" 34 | }, { 35 | "organization" : "com.googlecode.kiama", 36 | "name" : "kiama", 37 | "version" : "1.4.0" 38 | }, { 39 | "organization" : "com.github.mdr", 40 | "name" : "ascii-graphs", 41 | "version" : "0.0.2" 42 | }, { 43 | "organization" : "org.scalaz", 44 | "name" : "scalaz-core", 45 | "version" : "7.0.0-M7" 46 | }, { 47 | "organization" : "org.scalaz", 48 | "name" : "scalaz-concurrent", 49 | "version" : "7.0.0-M7" 50 | }, { 51 | "organization" : "org.specs2", 52 | "name" : "specs2", 53 | "version" : "1.12.3" 54 | }, { 55 | "organization" : "com.chuusai", 56 | "name" : "shapeless", 57 | "version" : "1.2.2" 58 | }, { 59 | "organization" : "org.mockito", 60 | "name" : "mockito-all", 61 | "version" : "1.9.0" 62 | } ], 63 | "scalas" : [ "2.9.2" ], 64 | "licenses" : [ ], 65 | "sbt" : false 66 | } -------------------------------------------------------------------------------- /src/main/ls/0.8.4-cdh4.json: -------------------------------------------------------------------------------- 1 | { 2 | "organization" : "com.nicta", 3 | "name" : "scoobi", 4 | "version" : "0.8.4-cdh4", 5 | "description" : "scoobi", 6 | "site" : "", 7 | "tags" : [ ], 8 | "docs" : "", 9 | "resolvers" : [ "https://oss.sonatype.org/content/repositories/releases" ], 10 | "dependencies" : [ { 11 | "organization" : "org.apache.avro", 12 | "name" : "avro", 13 | "version" : "1.7.4" 14 | }, { 15 | "organization" : "com.thoughtworks.xstream", 16 | "name" : "xstream", 17 | "version" : "1.4.4" 18 | }, { 19 | "organization" : "javassist", 20 | "name" : "javassist", 21 | "version" : "3.12.1.GA" 22 | }, { 23 | "organization" : "com.googlecode.kiama", 24 | "name" : "kiama", 25 | "version" : "1.5.2" 26 | }, { 27 | "organization" : "com.chuusai", 28 | "name" : "shapeless_2.10.3", 29 | "version" : "2.0.0-M1" 30 | }, { 31 | "organization" : "com.nicta", 32 | "name" : "scoobi-compatibility-cdh4", 33 | "version" : "1.0.1" 34 | }, { 35 | "organization" : "org.scalaz", 36 | "name" : "scalaz-core", 37 | "version" : "7.0.6" 38 | }, { 39 | "organization" : "org.scalaz", 40 | "name" : "scalaz-iteratee", 41 | "version" : "7.0.6" 42 | }, { 43 | "organization" : "org.scalaz", 44 | "name" : "scalaz-concurrent", 45 | "version" : "7.0.6" 46 | }, { 47 | "organization" : "org.scalaz", 48 | "name" : "scalaz-scalacheck-binding", 49 | "version" : "7.0.6" 50 | }, { 51 | "organization" : "org.scalaz", 52 | "name" : "scalaz-typelevel", 53 | "version" : "7.0.6" 54 | }, { 55 | "organization" : "org.scalaz", 56 | "name" : "scalaz-xml", 57 | "version" : "7.0.6" 58 | }, { 59 | "organization" : "org.specs2", 60 | "name" : "specs2-core", 61 | "version" : "2.3.10" 62 | } ], 63 | "scalas" : [ "2.10.3" ], 64 | "licenses" : [ ], 65 | "sbt" : false 66 | } -------------------------------------------------------------------------------- /src/main/ls/0.8.5-cdh4.json: -------------------------------------------------------------------------------- 1 | { 2 | "organization" : "com.nicta", 3 | "name" : "scoobi", 4 | "version" : "0.8.5-cdh4", 5 | "description" : "scoobi", 6 | "site" : "", 7 | "tags" : [ ], 8 | "docs" : "", 9 | "resolvers" : [ "https://oss.sonatype.org/content/repositories/releases" ], 10 | "dependencies" : [ { 11 | "organization" : "org.apache.avro", 12 | "name" : "avro", 13 | "version" : "1.7.4" 14 | }, { 15 | "organization" : "com.thoughtworks.xstream", 16 | "name" : "xstream", 17 | "version" : "1.4.4" 18 | }, { 19 | "organization" : "javassist", 20 | "name" : "javassist", 21 | "version" : "3.12.1.GA" 22 | }, { 23 | "organization" : "com.googlecode.kiama", 24 | "name" : "kiama", 25 | "version" : "1.6.0" 26 | }, { 27 | "organization" : "com.chuusai", 28 | "name" : "shapeless_2.10.3", 29 | "version" : "2.0.0-M1" 30 | }, { 31 | "organization" : "com.nicta", 32 | "name" : "scoobi-compatibility-cdh4", 33 | "version" : "1.0.2" 34 | }, { 35 | "organization" : "org.scalaz", 36 | "name" : "scalaz-core", 37 | "version" : "7.0.6" 38 | }, { 39 | "organization" : "org.scalaz", 40 | "name" : "scalaz-iteratee", 41 | "version" : "7.0.6" 42 | }, { 43 | "organization" : "org.scalaz", 44 | "name" : "scalaz-concurrent", 45 | "version" : "7.0.6" 46 | }, { 47 | "organization" : "org.scalaz", 48 | "name" : "scalaz-scalacheck-binding", 49 | "version" : "7.0.6" 50 | }, { 51 | "organization" : "org.scalaz", 52 | "name" : "scalaz-typelevel", 53 | "version" : "7.0.6" 54 | }, { 55 | "organization" : "org.scalaz", 56 | "name" : "scalaz-xml", 57 | "version" : "7.0.6" 58 | }, { 59 | "organization" : "org.specs2", 60 | "name" : "specs2-core", 61 | "version" : "2.3.12" 62 | } ], 63 | "scalas" : [ "2.10.3" ], 64 | "licenses" : [ ], 65 | "sbt" : false 66 | } -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/measure/RangeKey.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package measure 17 | 18 | import scala.math._ 19 | import units.{EmptyUnit, MeasureUnit} 20 | import scalaz.{Order, Semigroup} 21 | import scalaz.Ordering.{GT, LT, EQ} 22 | 23 | case class RangeKey(inf: Long, sup: Long) { 24 | 25 | override def toString = inf+" < <= "+sup 26 | def merge(o: RangeKey) = RangeKey(min(inf, o.inf), max(sup, o.sup)) 27 | def contains(o: RangeKey) = inf <= o.inf && o.sup <= sup 28 | 29 | } 30 | 31 | object RangeKey { 32 | 33 | def range(value: Long, size: Long, unit: MeasureUnit = EmptyUnit) = { 34 | val inf = (value / size) * size 35 | val sup = inf + size 36 | RangeKey(unit.fromBase(inf), unit.fromBase(sup)) 37 | } 38 | 39 | 40 | implicit val rangeKeyIsSemigroup: Semigroup[RangeKey] = new Semigroup[RangeKey] { 41 | def append(r1: RangeKey, r2: =>RangeKey) = r1.merge(r2) 42 | } 43 | implicit def rangeKeyOrder = new Order[RangeKey] { 44 | 45 | def order(x: RangeKey, y: RangeKey) = { 46 | if (x.contains(y) || y.contains(x)) EQ 47 | else if (x.sup <= y.inf) LT 48 | else GT 49 | } 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /src/main/ls/0.6.0-cdh4.json: -------------------------------------------------------------------------------- 1 | { 2 | "organization" : "com.nicta", 3 | "name" : "scoobi", 4 | "version" : "0.6.0-cdh4", 5 | "description" : "scoobi", 6 | "site" : "", 7 | "tags" : [ ], 8 | "docs" : "", 9 | "resolvers" : [ "https://oss.sonatype.org/content/repositories/releases" ], 10 | "dependencies" : [ { 11 | "organization" : "org.apache.hadoop", 12 | "name" : "hadoop-client", 13 | "version" : "2.0.0-mr1-cdh4.0.1" 14 | }, { 15 | "organization" : "org.apache.hadoop", 16 | "name" : "hadoop-core", 17 | "version" : "2.0.0-mr1-cdh4.0.1" 18 | }, { 19 | "organization" : "javassist", 20 | "name" : "javassist", 21 | "version" : "3.12.1.GA" 22 | }, { 23 | "organization" : "org.apache.avro", 24 | "name" : "avro-mapred", 25 | "version" : "1.7.3.1" 26 | }, { 27 | "organization" : "org.apache.avro", 28 | "name" : "avro", 29 | "version" : "1.7.3.1" 30 | }, { 31 | "organization" : "com.thoughtworks.xstream", 32 | "name" : "xstream", 33 | "version" : "1.4.3" 34 | }, { 35 | "organization" : "com.googlecode.kiama", 36 | "name" : "kiama", 37 | "version" : "1.4.0" 38 | }, { 39 | "organization" : "com.github.mdr", 40 | "name" : "ascii-graphs", 41 | "version" : "0.0.2" 42 | }, { 43 | "organization" : "org.scalaz", 44 | "name" : "scalaz-core", 45 | "version" : "7.0.0-M7" 46 | }, { 47 | "organization" : "org.scalaz", 48 | "name" : "scalaz-concurrent", 49 | "version" : "7.0.0-M7" 50 | }, { 51 | "organization" : "org.specs2", 52 | "name" : "specs2", 53 | "version" : "1.12.3" 54 | }, { 55 | "organization" : "com.chuusai", 56 | "name" : "shapeless", 57 | "version" : "1.2.2" 58 | }, { 59 | "organization" : "org.mockito", 60 | "name" : "mockito-all", 61 | "version" : "1.9.0" 62 | } ], 63 | "scalas" : [ "2.9.2" ], 64 | "licenses" : [ ], 65 | "sbt" : false 66 | } -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/application/InMemoryModeSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package application 18 | 19 | import testing.SimpleJobs 20 | import testing.mutable.HadoopSpecification 21 | import Scoobi._ 22 | import impl.plan.comp._ 23 | import core.Reduction.{Reduction => R} 24 | 25 | class InMemoryModeSpec extends HadoopSpecification with SimpleJobs with CompNodeData { sequential 26 | 27 | "The in memory mode can execute DLists and DObjects with repeating shared computations".txt 28 | 29 | "Basic computations for DLists" >> { 30 | "ParallelDo" >> { implicit sc: ScoobiConfiguration => 31 | DList(1, 2, 3).map(_ + 1).run === Seq(2, 3, 4) 32 | } 33 | "Combine" >> { implicit sc: ScoobiConfiguration => 34 | DList((1, Seq(2, 3)), (3, Seq(4))).combine(R.Sum.int).run === Seq((1, 5), (3, 4)) 35 | } 36 | } 37 | 38 | "Random tests" >> { 39 | implicit val inMemoryConfiguration = configureForInMemory(configuration) 40 | "Computing a DList must never fail" >> prop { (list: DList[String]) => 41 | list.run must not(throwAn[Exception]) 42 | } 43 | "Computing a DObject must never fail" >> prop { (o: DObject[String]) => 44 | o.run must not(throwAn[Exception]) 45 | } 46 | } 47 | override def contexts = Seq(inMemory) 48 | } 49 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/Persister.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | 19 | import org.apache.commons.logging.LogFactory 20 | import exec.{HadoopMode, InMemoryMode} 21 | import core.{DObject, DList, Persistent} 22 | import plan.comp._ 23 | import core.Mode._ 24 | 25 | class Persister(sc: core.ScoobiConfiguration) { 26 | private implicit val configuration = sc 27 | private implicit lazy val logger = LogFactory.getLog("scoobi.Persister") 28 | 29 | private val inMemoryMode = InMemoryMode() 30 | private val hadoopMode = HadoopMode(sc) 31 | 32 | def persist[A](ps: Seq[Persistent[_]]) = { 33 | val asOne = Root(ps.map(_.getComp)) 34 | sc.mode match { 35 | case InMemory => inMemoryMode.execute(asOne) 36 | case Local | Cluster => hadoopMode .execute(asOne) 37 | } 38 | ps 39 | } 40 | 41 | def persist[A](list: DList[A]) = { 42 | sc.mode match { 43 | case InMemory => inMemoryMode.execute(list) 44 | case Local | Cluster => hadoopMode .execute(list) 45 | } 46 | list 47 | } 48 | 49 | def persist[A](o: DObject[A]): A = { 50 | sc.mode match { 51 | case InMemory => inMemoryMode.execute(o).asInstanceOf[A] 52 | case Local | Cluster => hadoopMode .execute(o).asInstanceOf[A] 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/mapreducer/VectorEmitterWriter.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package mapreducer 19 | 20 | import scala.collection.immutable.VectorBuilder 21 | import core._ 22 | import plan.comp.ParallelDo 23 | 24 | 25 | /** 26 | * In memory emitter writer saving the values to a Vector 27 | */ 28 | case class VectorEmitterWriter(context: InputOutputContext) extends EmitterWriter with InputOutputContextScoobiJobContext { 29 | private val vb = new VectorBuilder[Any] 30 | def write(v: Any) { vb += v } 31 | 32 | /** use this emitter to map a list of value with a parallelDo */ 33 | def map(environment: Any, mappedValues: Seq[Any], mapper: ParallelDo)(implicit configuration: ScoobiConfiguration) = { 34 | vb.clear 35 | mappedValues.foreach(v => mapper.map(environment, v, this)) 36 | try result 37 | finally vb.clear 38 | } 39 | 40 | // for testing only 41 | protected[scoobi] def result = vb.result 42 | 43 | } 44 | 45 | // used for testing only 46 | object VectorEmitterWriter { 47 | def create = new VectorEmitterWriter(null) { 48 | override def incrementCounter(groupName: String, name: String, increment: Long = 1) {} 49 | override def getCounter(groupName: String, name: String) = -1 50 | override def tick {} 51 | 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /notes/0.8.0.markdown: -------------------------------------------------------------------------------- 1 | Scoobi version for Hadoop2! 2 | 3 | ### Features 4 | 5 | * `scoobi-0.8.0.jar` can be used with Hadoop 2.2.0. If you want to go on using CDH4 you need to use `scoobi-0.8.0-cdh4.jar` 6 | * added the possibility to use counters to count the number of values per mapper or reducer (see `ScoobiConfiguration`) [#297](https://github.com/NICTA/scoobi/issues/297) 7 | * add wire format for fixed avro type [#301](https://github.com/NICTA/scoobi/issues/306), PR by Jan Prach 8 | * added methods to read and write "partitioned" files, for example files created in directories where the name is meaningful: `year=2014/month=02/day=01/part.txt`. When reading, a `DList[(String, String)]` is created where each value is a line in the file and the key is the file path. When writing, a function `K => String` can be provided in order to output the text files to specific directories based on each key 9 | 10 | ### Improvements 11 | 12 | * extracted a `InputsOutputs.fromSource` method to create a `DList[A]` from any `DataSource[_,_,A]` 13 | * removed the dependency on the `ascii-graph` library 14 | * updated to Scala 2.10.3 15 | * removed deprecated methods created in `0.7.0` 16 | 17 | ### Fixes 18 | 19 | * fixed the usage of globs for hadoop 2 [#305](https://github.com/NICTA/scoobi/issues/305) 20 | * fixed the setup for sequence files where previous files where not removed 21 | * fixed the persistence of a DObject when defined from just a value 22 | * In-memory mode does not keep track of hadoop counters [#306](https://github.com/NICTA/scoobi/issues/306), PR by Ilya Maykov 23 | * MR job numbering broken in HadoopMode for job plans with more than 1 layer [#308](https://github.com/NICTA/scoobi/issues/308), PR by Ilya Maykov 24 | * prevent extra delimiter on sequence types in listToDelimitedTextFile [#311](https://github.com/NICTA/scoobi/issues/311) 25 | * extracted the converter used in fromTextFileWithPaths to avoid duplication [#315](https://github.com/NICTA/scoobi/issues/315) 26 | 27 | ------ 28 | -------------------------------------------------------------------------------- /src/main/ls/0.9.1-cdh4.json: -------------------------------------------------------------------------------- 1 | { 2 | "organization" : "com.nicta", 3 | "name" : "scoobi", 4 | "version" : "0.9.1-cdh4-SNAPSHOT", 5 | "description" : "scoobi", 6 | "site" : "", 7 | "tags" : [ ], 8 | "docs" : "", 9 | "resolvers" : [ "https://oss.sonatype.org/content/repositories/releases" ], 10 | "dependencies" : [ { 11 | "organization" : "org.apache.avro", 12 | "name" : "avro", 13 | "version" : "1.7.4" 14 | }, { 15 | "organization" : "com.thoughtworks.xstream", 16 | "name" : "xstream", 17 | "version" : "1.4.4" 18 | }, { 19 | "organization" : "javassist", 20 | "name" : "javassist", 21 | "version" : "3.12.1.GA" 22 | }, { 23 | "organization" : "com.googlecode.kiama", 24 | "name" : "kiama", 25 | "version" : "1.6.0" 26 | }, { 27 | "organization" : "com.chuusai", 28 | "name" : "shapeless", 29 | "version" : "2.0.0" 30 | }, { 31 | "organization" : "com.nicta", 32 | "name" : "scoobi-compatibility-cdh4", 33 | "version" : "1.0.3" 34 | }, { 35 | "organization" : "org.apache.thrift", 36 | "name" : "libthrift", 37 | "version" : "0.9.1" 38 | }, { 39 | "organization" : "org.scalaz", 40 | "name" : "scalaz-core", 41 | "version" : "7.1.0" 42 | }, { 43 | "organization" : "org.scalaz", 44 | "name" : "scalaz-iteratee", 45 | "version" : "7.1.0" 46 | }, { 47 | "organization" : "org.scalaz", 48 | "name" : "scalaz-concurrent", 49 | "version" : "7.1.0" 50 | }, { 51 | "organization" : "org.scalaz", 52 | "name" : "scalaz-scalacheck-binding", 53 | "version" : "7.1.0" 54 | }, { 55 | "organization" : "org.scalaz", 56 | "name" : "scalaz-typelevel", 57 | "version" : "7.1.0" 58 | }, { 59 | "organization" : "org.scalaz", 60 | "name" : "scalaz-xml", 61 | "version" : "7.1.0" 62 | }, { 63 | "organization" : "org.specs2", 64 | "name" : "specs2-core", 65 | "version" : "2.4.2" 66 | } ], 67 | "scalas" : [ "2.10.4", "2.11.2" ], 68 | "licenses" : [ ], 69 | "sbt" : false 70 | } -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/time/package.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | import java.text.SimpleDateFormat 17 | import java.util._ 18 | import Calendar._ 19 | 20 | /** 21 | * This package provides newtypes for times, as Tagged Long types 22 | */ 23 | package object time { 24 | 25 | // Unboxed newtypes, credit to @milessabin and @retronym 26 | type Tagged[U] = { type Tag = U } 27 | type @@[T, U] = T with Tagged[U] 28 | 29 | class Tagger[U] { def apply[T](t : T) : T @@ U = t.asInstanceOf[T @@ U] } 30 | def tag[U] = new Tagger[U] 31 | 32 | trait Day 33 | trait Epoch 34 | 35 | // java.lang.Long needs to be used here in order to be used in a case class 36 | // @see http://issues.scala-lang.org/browse/SI-5183 37 | type Epochtime = java.lang.Long @@ Epoch 38 | type Daytime = java.lang.Long @@ Day 39 | 40 | def daytime(i: java.lang.Long): Daytime = tag(i) 41 | def epochtime(i: java.lang.Long): Epochtime = tag(i) 42 | 43 | /** @return the number of elapsed millis since the beginning of day for that time */ 44 | def epochTimeToDaytime(t: Long): Daytime = { 45 | val calendar = Calendar.getInstance 46 | calendar.setTime(new Date(t)) 47 | daytime(((calendar.get(HOUR_OF_DAY)*60+calendar.get(MINUTE))*60+calendar.get(SECOND))*1000 + calendar.get(MILLISECOND)) 48 | } 49 | 50 | 51 | } 52 | 53 | -------------------------------------------------------------------------------- /bin/ci-release-oss: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | 3 | JVM_OPTS="-Dfile.encoding=UTF8 -XX:MaxPermSize=512m -Xms512m -Xmx2g -XX:+CMSClassUnloadingEnabled -XX:+UseConcMarkSweepGC"; export JVM_OPTS 4 | 5 | cat > project/ambiata-oss-plugins.sbt < 29 | override def self: Component with EventStream[Seq[File]] = this.asInstanceOf[Component with EventStream[Seq[File]]] 30 | 31 | val fileChooser = new FileChooser(new java.io.File(start)) 32 | fileChooser.peer.setMultiSelectionEnabled(true) 33 | 34 | /** 35 | * @return the selected files 36 | */ 37 | def selectedFiles: Seq[File] = Option(fileChooser.selectedFiles).toSeq.flatten 38 | 39 | action = new Action(label) { 40 | icon = getIcon("folder-icon.png") 41 | mnemonic = KeyEvent.VK_O 42 | accelerator = Some(KeyStroke.getKeyStroke(KeyEvent.VK_O, ActionEvent.ALT_MASK)) 43 | 44 | def apply() { 45 | fileChooser.showOpenDialog(outer) match { 46 | case Result.Approve => source.fire(selectedFiles) 47 | case _ => () 48 | } 49 | } 50 | } 51 | } 52 | 53 | 54 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/impl/reflect/ClassesSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package reflect 19 | 20 | import testing.mutable.UnitSpecification 21 | import java.util.jar.JarEntry 22 | import Classes._ 23 | import application.ScoobiApp 24 | 25 | class ClassesSpec extends UnitSpecification { 26 | "the main jar contains all the dependent jars classes if it contains the DList scoobi class" >> { 27 | val classesWithDependencies = new Classes { 28 | override def mainJarEntries = Seq(new JarEntry("com/nicta/scoobi/core/DList.class")) 29 | } 30 | val classesWithoutDependencies = new Classes { 31 | override def mainJarEntries = Seq(new JarEntry("java/util/ArrayList.class")) 32 | } 33 | 34 | classesWithDependencies.mainJarContainsDependencies must beTrue 35 | classesWithoutDependencies.mainJarContainsDependencies must beFalse 36 | }; p 37 | 38 | "It is possible to find the directory containing a given class" >> { 39 | "classFile returns the file resource for a given class" >> { 40 | classFile(classOf[ScoobiApp].getName) === "com/nicta/scoobi/application/ScoobiApp.class" 41 | } 42 | "getResource returns the URL for a given class" >> { 43 | getResource(classOf[ScoobiApp]).map(_.toString) must beSome.like { case s => s must endWith("com/nicta/scoobi/application/ScoobiApp.class") } 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/gui/StatusPanel.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package gui 17 | 18 | import java.awt.{Font, Color} 19 | import java.io._ 20 | import swing._ 21 | import javax.swing.SwingUtilities 22 | 23 | class StatusPanel extends TextArea { outer => 24 | foreground = Color.blue 25 | font = new Font("Courier", Font.PLAIN, 11); 26 | rows = 5 27 | 28 | redirectSystemStreams() 29 | 30 | private def updateTextArea(text: String) { 31 | SwingUtilities.invokeLater(new Runnable() { 32 | def run() { 33 | outer.append(text) 34 | }}) 35 | } 36 | 37 | private def redirectSystemStreams() { 38 | val systemOut = System.out 39 | val out = new OutputStream { 40 | override def write(b: Int) { 41 | updateTextArea(String.valueOf(b)) 42 | systemOut.println(String.valueOf(b)) 43 | } 44 | 45 | override def write(b: Array[Byte], off: Int, len: Int) { 46 | updateTextArea(new String(b, off, len)) 47 | systemOut.println(new String(b, off, len)) 48 | } 49 | 50 | override def write(b: Array[Byte]) { 51 | write(b, 0, b.length) 52 | } 53 | } 54 | def printOut = new PrintStream(out, true) 55 | System.setOut(printOut) 56 | System.setErr(printOut) 57 | scala.Console.setOut(printOut) 58 | scala.Console.setErr(printOut) 59 | } 60 | 61 | } -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/app/ReportPanel.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package app 17 | 18 | import report._ 19 | import gui._ 20 | import reactive.{Var, Signal} 21 | import swing.{BoxPanel, Orientation} 22 | 23 | /** 24 | * display the queries selection panel + the results + the total number of records in the store 25 | */ 26 | case class ReportPanel(report: Signal[Report]) { outer => 27 | 28 | var mainPanel = PositionedBorderPanel(center = new BarsPanel(report.map(_.name), 29 | report.map(_.measured), 30 | report.map(_.unit), 31 | logarithmic = true, 32 | horizontal = false, 33 | report.map(_.results)), 34 | south = TotalRecords(report)) 35 | 36 | } 37 | 38 | 39 | case class TotalRecords(report: Signal[Report] = Var(Report())) extends BoxPanel(Orientation.Horizontal) { 40 | 41 | contents += new LabeledFieldPanel("Start time", report.map(_.startTime)) 42 | contents += new LabeledFieldPanel("End time", report.map(_.endTime)) 43 | contents += new LabeledFieldPanel("Total records", report.map(_.recordsNumber.toString)) 44 | 45 | } 46 | 47 | -------------------------------------------------------------------------------- /src/main/bin/scoobi: -------------------------------------------------------------------------------- 1 | #!/bin/bash --posix 2 | # 3 | # This script is copied from the original scala script to be able to use 4 | # the exit function in order to make sure that the terminal is working properly 5 | # on exit 6 | # 7 | ############################################################################## 8 | # Copyright 2002-2011, LAMP/EPFL 9 | # 10 | # This is free software; see the distribution for copying conditions. 11 | # There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A 12 | # PARTICULAR PURPOSE. 13 | ############################################################################## 14 | 15 | # Not sure what the right default is here 16 | # trying nonzero. 17 | scala_exit_status=127 18 | saved_stty="" 19 | 20 | # restore stty settings (echo in particular) 21 | function restoreSttySettings() { 22 | if [[ -n $SCALA_RUNNER_DEBUG ]]; then 23 | echo "restoring stty 24 | $saved_stty" 25 | fi 26 | 27 | stty $saved_stty 28 | saved_stty="" 29 | } 30 | 31 | function onExit() { 32 | if [[ "$saved_stty" != "" ]]; then 33 | restoreSttySettings 34 | fi 35 | exit $scala_exit_status 36 | } 37 | 38 | # to reenable echo if we are interrupted before completing. 39 | trap onExit INT 40 | 41 | # save terminal settings 42 | saved_stty=$(stty -g 2>/dev/null) 43 | # clear on error so we don't later try to restore them 44 | if [[ ! $? ]]; then 45 | saved_stty="" 46 | fi 47 | if [[ -n $SCALA_RUNNER_DEBUG ]]; then 48 | echo "saved stty 49 | $saved_stty" 50 | fi 51 | 52 | # command-line args 53 | user_classpath="" 54 | 55 | while : 56 | do 57 | case $1 in 58 | -cp) 59 | user_classpath=$2 60 | shift 61 | shift 62 | ;; 63 | *) 64 | break 65 | ;; 66 | esac 67 | done 68 | 69 | # basedir 70 | base=$(readlink -f ${BASH_SOURCE[0]} | xargs dirname) 71 | 72 | # classpath 73 | classpath=$base/scoobi-repl.jar:`hadoop classpath` 74 | if [ $user_classpath ]; then 75 | classpath=$user_classpath:$classpath 76 | fi 77 | 78 | # launch 79 | : ${HADOOP_CONF_DIR:=/home/hadoop/conf} 80 | java -cp $classpath com.nicta.scoobi.application.ScoobiRepl 81 | 82 | scala_exit_status=$? 83 | onExit 84 | -------------------------------------------------------------------------------- /src/main/ls/0.8.2-cdh4.json: -------------------------------------------------------------------------------- 1 | { 2 | "organization" : "com.nicta", 3 | "name" : "scoobi", 4 | "version" : "0.8.2-cdh4", 5 | "description" : "scoobi", 6 | "site" : "", 7 | "tags" : [ ], 8 | "docs" : "", 9 | "resolvers" : [ "https://oss.sonatype.org/content/repositories/releases" ], 10 | "dependencies" : [ { 11 | "organization" : "org.apache.avro", 12 | "name" : "avro", 13 | "version" : "1.7.4" 14 | }, { 15 | "organization" : "com.thoughtworks.xstream", 16 | "name" : "xstream", 17 | "version" : "1.4.4" 18 | }, { 19 | "organization" : "javassist", 20 | "name" : "javassist", 21 | "version" : "3.12.1.GA" 22 | }, { 23 | "organization" : "com.googlecode.kiama", 24 | "name" : "kiama", 25 | "version" : "1.5.2" 26 | }, { 27 | "organization" : "com.chuusai", 28 | "name" : "shapeless_2.10.2", 29 | "version" : "2.0.0-M1" 30 | }, { 31 | "organization" : "org.apache.hadoop", 32 | "name" : "hadoop-client", 33 | "version" : "2.0.0-mr1-cdh4.0.1" 34 | }, { 35 | "organization" : "org.apache.hadoop", 36 | "name" : "hadoop-core", 37 | "version" : "2.0.0-mr1-cdh4.0.1" 38 | }, { 39 | "organization" : "org.apache.avro", 40 | "name" : "avro-mapred", 41 | "version" : "1.7.4" 42 | }, { 43 | "organization" : "org.scalaz", 44 | "name" : "scalaz-core", 45 | "version" : "7.0.6" 46 | }, { 47 | "organization" : "org.scalaz", 48 | "name" : "scalaz-iteratee", 49 | "version" : "7.0.6" 50 | }, { 51 | "organization" : "org.scalaz", 52 | "name" : "scalaz-concurrent", 53 | "version" : "7.0.6" 54 | }, { 55 | "organization" : "org.scalaz", 56 | "name" : "scalaz-scalacheck-binding", 57 | "version" : "7.0.6" 58 | }, { 59 | "organization" : "org.scalaz", 60 | "name" : "scalaz-typelevel", 61 | "version" : "7.0.6" 62 | }, { 63 | "organization" : "org.scalaz", 64 | "name" : "scalaz-xml", 65 | "version" : "7.0.6" 66 | }, { 67 | "organization" : "org.specs2", 68 | "name" : "specs2-core", 69 | "version" : "2.3.10" 70 | } ], 71 | "scalas" : [ "2.10.3" ], 72 | "licenses" : [ ], 73 | "sbt" : false 74 | } -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/core/InputOutputConverter.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package core 18 | 19 | import org.apache.hadoop.conf.Configuration 20 | import org.apache.hadoop.mapreduce.MapContext 21 | 22 | /** 23 | * Convert an InputFormat's key-value types to the type produced by a source 24 | */ 25 | trait InputConverter[K, V, A] extends FromKeyValueConverter { 26 | type InputContext = MapContext[K, V, _, _] 27 | def asValue(context: InputOutputContext, key: Any, value: Any): Any = fromKeyValue(context.context.asInstanceOf[InputContext], key.asInstanceOf[K], value.asInstanceOf[V]) 28 | def fromKeyValue(context: InputContext, key: K, value: V): A 29 | } 30 | 31 | /** Convert the type consumed by a DataSink into an OutputFormat's key-value types. */ 32 | trait OutputConverter[K, V, B] extends ToKeyValueConverter { 33 | protected[scoobi] 34 | def asKeyValue(x: Any)(implicit configuration: Configuration) = toKeyValue(x.asInstanceOf[B]).asInstanceOf[(Any, Any)] 35 | def toKeyValue(x: B)(implicit configuration: Configuration): (K, V) 36 | } 37 | 38 | /** 39 | * Internal untyped output converter from value to (key,value) 40 | */ 41 | private[scoobi] 42 | trait ToKeyValueConverter { 43 | protected[scoobi] 44 | def asKeyValue(x: Any)(implicit configuration: Configuration): (Any, Any) 45 | } 46 | 47 | /** fusion of both trait when bi-directional conversion is possible */ 48 | trait InputOutputConverter[K, V, B] extends OutputConverter[K, V, B] with InputConverter[K, V, B] -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/io/text/TextSource.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta 17 | package scoobi 18 | package io 19 | package text 20 | 21 | import core._ 22 | import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat, FileInputFormat} 23 | import org.apache.hadoop.io.{Text, LongWritable} 24 | import org.apache.hadoop.fs.Path 25 | import org.apache.hadoop.mapreduce.Job 26 | import impl.io.Files 27 | 28 | 29 | /** Class that abstracts all the common functionality of reading from text files. */ 30 | case class TextSource[A : WireFormat](paths: Seq[String], 31 | inputFormat: Class[_ <: FileInputFormat[LongWritable, Text]] = classOf[TextInputFormat], 32 | inputConverter: InputConverter[LongWritable, Text, A] = TextInput.defaultTextConverter, 33 | check: Source.InputCheck = Source.defaultInputCheck) 34 | extends DataSource[LongWritable, Text, A] { 35 | 36 | private val inputPaths = paths.map(p => new Path(p)) 37 | override def toString = "TextSource("+id+")"+inputPaths.mkString("\n", "\n", "\n") 38 | 39 | def inputCheck(implicit sc: ScoobiConfiguration) { check(inputPaths, sc) } 40 | 41 | def inputConfigure(job: Job)(implicit sc: ScoobiConfiguration) { 42 | inputPaths foreach { p => FileInputFormat.addInputPath(job, p) } 43 | } 44 | 45 | def inputSize(implicit sc: ScoobiConfiguration): Long = 46 | inputPaths.map(p => Files.pathSize(p)(sc.configuration)).sum 47 | } 48 | 49 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/Scoobi.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | 18 | import application._ 19 | import core.Reductions 20 | import impl.control.ImplicitParameters 21 | import impl.ScoobiConfigurations 22 | import lib._ 23 | 24 | /** Global Scoobi functions and values. */ 25 | object Scoobi extends core.WireFormatImplicits 26 | with core.GroupingImplicits 27 | with Application 28 | with InputsOutputs 29 | with Persist 30 | with Library 31 | with DObjects 32 | with Reductions 33 | with ScoobiConfigurations 34 | with ImplicitParameters { 35 | 36 | /* Primary types */ 37 | type WireFormat[A] = com.nicta.scoobi.core.WireFormat[A] 38 | val DList = DLists 39 | type DList[A] = com.nicta.scoobi.core.DList[A] 40 | implicit def traversableToDList[A : WireFormat](trav: Traversable[A]) = DLists.TraversableToDList(trav) 41 | 42 | val DObject = DObjects 43 | type DObject[A] = com.nicta.scoobi.core.DObject[A] 44 | 45 | type DoFn[A, B] = com.nicta.scoobi.core.DoFn[A, B] 46 | 47 | type EnvDoFn[A, B, E] = com.nicta.scoobi.core.EnvDoFn[A, B, E] 48 | 49 | val Grouping = com.nicta.scoobi.core.Grouping 50 | type Grouping[A] = com.nicta.scoobi.core.Grouping[A] 51 | 52 | type Emitter[A] = com.nicta.scoobi.core.Emitter[A] 53 | 54 | val ScoobiConfiguration = impl.ScoobiConfiguration 55 | 56 | type Counters = com.nicta.scoobi.core.Counters 57 | type Heartbeat = com.nicta.scoobi.core.Heartbeat 58 | 59 | type ExpiryPolicy = com.nicta.scoobi.core.ExpiryPolicy 60 | 61 | } 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/units/Quantity.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package units 17 | 18 | import scalaz._ 19 | import Scalaz._ 20 | import scalazx.Reducer._ 21 | /** 22 | * This class represent a quantity which is the result of measuring something like an execution time: 23 | * 24 | * Quantity("execution time", 1234, Millis) 25 | * 26 | * The value must always be stored as the value for the smallest measure unit and the 'unit' field can be used to do the conversion 27 | * 28 | */ 29 | case class Quantity(name: String, baseValue: Long, unit: MeasureUnit = EmptyUnit) { 30 | /** @return the value converted to the quantity unit */ 31 | def value = unit.fromBase(baseValue) 32 | 33 | /** @return a displayable representation */ 34 | def show(v: Long) = v+" "+unit 35 | 36 | /** change the unit */ 37 | def withUnit(u: MeasureUnit) = copy(unit = u) 38 | 39 | /** increment with a given quantity, only used for testing */ 40 | def increment(i: Long) = copy(baseValue = baseValue + i) 41 | 42 | /** divide by a number */ 43 | def divideBy(i: Int) = copy(baseValue = baseValue / i) 44 | } 45 | 46 | object Quantity { 47 | 48 | /** ordering on quantities */ 49 | lazy val quantityOrder : Order[Quantity] = order((_:Quantity).value) 50 | 51 | /** quantities can be added */ 52 | lazy val quantityIsSemigroup: Semigroup[Quantity] = new Semigroup[Quantity] { 53 | def append(q1: Quantity, q2: =>Quantity) = q1.copy(baseValue = q1.baseValue + q2.baseValue) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/text/Text.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package text 17 | 18 | /** 19 | * utility methods to work on strings 20 | */ 21 | object Text { 22 | 23 | /** 24 | * This methods provides a default case for a conversion to a number 25 | */ 26 | implicit def toNumber(s: String) = new ToNumber(s) 27 | class ToNumber(s: String) { 28 | def toIntOrZero = try { s.toInt } catch { case e: Throwable => 0 } 29 | } 30 | 31 | /** @return an extended String */ 32 | implicit def extendedString(s: String) = new ExtendedString(s) 33 | 34 | class ExtendedString(s: String) { 35 | 36 | /** 37 | * @return true if s matches an 'include / exclude' regexp: 38 | * 39 | * - if include is defined s must match include 40 | * - if exclude is defined s must not match exclude 41 | * 42 | * @see measure.TextSpec 43 | */ 44 | def matchesOnly(only: String) = try { 45 | def matches(exp: String) = s matches ".*"+exp+".*" 46 | 47 | val includeExclude = only.trim.split("/").filter(_.nonEmpty) 48 | if (includeExclude.size == 2) matches(includeExclude(0).trim) && !matches(includeExclude(1).trim) 49 | else if (only.trim startsWith "/") !matches(only.trim.drop(1)) 50 | else if (only.trim endsWith "/") matches(only.trim.dropRight(1)) 51 | else matches(only.trim) 52 | 53 | } catch { case _:Throwable => true } 54 | 55 | } 56 | 57 | } -------------------------------------------------------------------------------- /src/main/ls/0.7.0-cdh3.json: -------------------------------------------------------------------------------- 1 | { 2 | "organization" : "com.nicta", 3 | "name" : "scoobi", 4 | "version" : "0.7.0-cdh3", 5 | "description" : "scoobi", 6 | "site" : "", 7 | "tags" : [ ], 8 | "docs" : "", 9 | "resolvers" : [ "https://oss.sonatype.org/content/repositories/releases" ], 10 | "dependencies" : [ { 11 | "organization" : "org.apache.avro", 12 | "name" : "avro", 13 | "version" : "1.7.4" 14 | }, { 15 | "organization" : "com.thoughtworks.xstream", 16 | "name" : "xstream", 17 | "version" : "1.4.4" 18 | }, { 19 | "organization" : "javassist", 20 | "name" : "javassist", 21 | "version" : "3.12.1.GA" 22 | }, { 23 | "organization" : "com.googlecode.kiama", 24 | "name" : "kiama", 25 | "version" : "1.5.0" 26 | }, { 27 | "organization" : "com.github.mdr", 28 | "name" : "ascii-graphs", 29 | "version" : "0.0.3" 30 | }, { 31 | "organization" : "com.chuusai", 32 | "name" : "shapeless", 33 | "version" : "1.2.4" 34 | }, { 35 | "organization" : "org.apache.hadoop", 36 | "name" : "hadoop-core", 37 | "version" : "0.20.2-cdh3u1" 38 | }, { 39 | "organization" : "org.apache.avro", 40 | "name" : "avro-mapred", 41 | "version" : "1.7.4" 42 | }, { 43 | "organization" : "org.scalaz", 44 | "name" : "scalaz-core", 45 | "version" : "7.0.0" 46 | }, { 47 | "organization" : "org.scalaz", 48 | "name" : "scalaz-concurrent", 49 | "version" : "7.0.0" 50 | }, { 51 | "organization" : "org.scalaz", 52 | "name" : "scalaz-scalacheck-binding", 53 | "version" : "7.0.0" 54 | }, { 55 | "organization" : "org.scalaz", 56 | "name" : "scalaz-typelevel", 57 | "version" : "7.0.0" 58 | }, { 59 | "organization" : "org.scalaz", 60 | "name" : "scalaz-xml", 61 | "version" : "7.0.0" 62 | }, { 63 | "organization" : "org.specs2", 64 | "name" : "specs2", 65 | "version" : "2.0" 66 | }, { 67 | "organization" : "org.scalacheck", 68 | "name" : "scalacheck", 69 | "version" : "1.10.0" 70 | }, { 71 | "organization" : "org.mockito", 72 | "name" : "mockito-all", 73 | "version" : "1.9.0" 74 | } ], 75 | "scalas" : [ "2.10.2" ], 76 | "licenses" : [ ], 77 | "sbt" : false 78 | } -------------------------------------------------------------------------------- /src/main/ls/0.8.0-cdh3.json: -------------------------------------------------------------------------------- 1 | { 2 | "organization" : "com.nicta", 3 | "name" : "scoobi", 4 | "version" : "0.8.0-cdh3", 5 | "description" : "scoobi", 6 | "site" : "", 7 | "tags" : [ ], 8 | "docs" : "", 9 | "resolvers" : [ "https://oss.sonatype.org/content/repositories/releases" ], 10 | "dependencies" : [ { 11 | "organization" : "org.apache.avro", 12 | "name" : "avro", 13 | "version" : "1.7.4" 14 | }, { 15 | "organization" : "com.thoughtworks.xstream", 16 | "name" : "xstream", 17 | "version" : "1.4.4" 18 | }, { 19 | "organization" : "javassist", 20 | "name" : "javassist", 21 | "version" : "3.12.1.GA" 22 | }, { 23 | "organization" : "com.googlecode.kiama", 24 | "name" : "kiama", 25 | "version" : "1.5.1" 26 | }, { 27 | "organization" : "com.github.mdr", 28 | "name" : "ascii-graphs", 29 | "version" : "0.0.3" 30 | }, { 31 | "organization" : "com.chuusai", 32 | "name" : "shapeless", 33 | "version" : "1.2.4" 34 | }, { 35 | "organization" : "org.apache.hadoop", 36 | "name" : "hadoop-core", 37 | "version" : "0.20.2-cdh3u1" 38 | }, { 39 | "organization" : "org.apache.avro", 40 | "name" : "avro-mapred", 41 | "version" : "1.7.4" 42 | }, { 43 | "organization" : "org.scalaz", 44 | "name" : "scalaz-core", 45 | "version" : "7.0.2" 46 | }, { 47 | "organization" : "org.scalaz", 48 | "name" : "scalaz-concurrent", 49 | "version" : "7.0.2" 50 | }, { 51 | "organization" : "org.scalaz", 52 | "name" : "scalaz-scalacheck-binding", 53 | "version" : "7.0.2" 54 | }, { 55 | "organization" : "org.scalaz", 56 | "name" : "scalaz-typelevel", 57 | "version" : "7.0.2" 58 | }, { 59 | "organization" : "org.scalaz", 60 | "name" : "scalaz-xml", 61 | "version" : "7.0.2" 62 | }, { 63 | "organization" : "org.specs2", 64 | "name" : "specs2", 65 | "version" : "2.1.1" 66 | }, { 67 | "organization" : "org.scalacheck", 68 | "name" : "scalacheck", 69 | "version" : "1.10.0" 70 | }, { 71 | "organization" : "org.mockito", 72 | "name" : "mockito-all", 73 | "version" : "1.9.0" 74 | } ], 75 | "scalas" : [ "2.10.2" ], 76 | "licenses" : [ ], 77 | "sbt" : false 78 | } -------------------------------------------------------------------------------- /examples/scoobding/scoobding.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/rtt/ScoobiWritable.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package rtt 19 | 20 | import org.apache.hadoop.io._ 21 | import core._ 22 | import java.io.{DataInput, DataOutput} 23 | import org.apache.hadoop.conf.Configuration 24 | 25 | /** The super-class of all "value" types used in Hadoop jobs. */ 26 | abstract class ScoobiWritable[A](private var x: A) extends Writable with Configured { self => 27 | def this() = this(null.asInstanceOf[A]) 28 | def get: A = x 29 | def set(x: A) { self.x = x } 30 | } 31 | 32 | 33 | /** Constructs a ScoobiWritable, with some metadata (a WireFormat) retrieved from the distributed cache */ 34 | object ScoobiWritable { 35 | def apply(name: String, wf: WireReaderWriter)(implicit sc: ScoobiConfiguration): RuntimeClass = 36 | MetadataClassBuilder[MetadataScoobiWritable](name, wf, sc.scoobiClassLoader, sc.configuration)(implicitly[Manifest[MetadataScoobiWritable]]).toRuntimeClass 37 | 38 | def apply[A](name: String, witness: A)(implicit sc: ScoobiConfiguration, wf: WireReaderWriter): RuntimeClass = 39 | apply(name, wf)(sc) 40 | } 41 | 42 | abstract class MetadataScoobiWritable extends ScoobiWritable[Any] { 43 | 44 | def metadataTag: String 45 | 46 | lazy val wireFormat = ScoobiMetadata.metadata(configuration)(metadataTag).asInstanceOf[WireReaderWriter] 47 | 48 | def write(out: DataOutput) { 49 | wireFormat.write(get, out) 50 | } 51 | 52 | def readFields(in: DataInput) { 53 | set(wireFormat.read(in)) 54 | } 55 | 56 | override def toString = get.toString 57 | } 58 | 59 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/reflect/ClasspathDiagnostics.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi.impl.reflect 17 | 18 | import org.apache.commons.logging.{Log, LogFactory} 19 | import org.apache.avro.Schema 20 | import Classes._ 21 | import org.apache.hadoop.io.Writable 22 | import com.nicta.scoobi.core.ScoobiConfiguration 23 | import org.kiama.rewriting.Rewriter 24 | 25 | /** 26 | * This object prints out the originating jar for the most important libraries used in Scoobi 27 | * so that it's easier to debug classpath errors. 28 | * 29 | * To use this object you need to have an implicit logger object in scope and call the logDebug method 30 | * 31 | */ 32 | object ClasspathDiagnostics { 33 | 34 | def logInfo(implicit logger: Log) { logFiles(logger.info _) } 35 | def logDebug(implicit logger: Log) { logFiles(logger.debug _) } 36 | 37 | private def logFiles(logFunction: String => Unit) { 38 | Seq( 39 | ("Java", classOf[java.lang.String]), 40 | ("Scala", classOf[scala.Range]), 41 | ("Hadoop", classOf[Writable]), 42 | ("Avro", classOf[Schema]), 43 | ("Kiama", classOf[Rewriter]), 44 | ("Scoobi", classOf[ScoobiConfiguration]) 45 | ).foreach { case (lib, c) => logDebugClass(lib, c.getName)(logFunction) } 46 | } 47 | 48 | private def logDebugClass(libName: String, className: String)(logFunction: String => Unit) { 49 | try logFunction(s"the URL of $libName (evidenced with the $className class) is "+getClass.getClassLoader.getResource(filePath(className))) 50 | catch { case e: Exception => e.printStackTrace } 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/testing/NictaHadoop.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package testing 18 | 19 | import core._ 20 | import application._ 21 | import org.specs2.specification.Fragments 22 | import org.specs2.specification.Tags 23 | import org.apache.hadoop.conf.Configuration 24 | 25 | /** 26 | * This trait can be used to create Hadoop specifications on the NictaCluster 27 | */ 28 | abstract class NictaHadoop extends 29 | HadoopSpecification with 30 | Tags with 31 | NictaCluster { 32 | 33 | /**this type alias makes it shorter to pass a new configuration object to each example */ 34 | type SC = ScoobiConfiguration 35 | 36 | def acceptanceSection = section("hadoop") 37 | 38 | override def map(fs: =>Fragments) = super.map(fs).insert(acceptanceSection).add(acceptanceSection) 39 | 40 | /** 41 | * add more memory when running on the cluster 42 | */ 43 | override def setConfiguration(configuration: Configuration) = { 44 | super.setConfiguration(configuration) 45 | configuration.set("mapred.child.java.opts", "-Xmx512m") 46 | configuration 47 | } 48 | } 49 | 50 | /** 51 | * Addresses for the filesystem and jobtracker for the Nicta cluster. They override the search for those values in the local configuration files 52 | */ 53 | trait NictaCluster extends Cluster { 54 | override def fs = "hdfs://svm-hadoop1.ssrg.nicta.com.au" 55 | override def jobTracker = "svm-hadoop1.ssrg.nicta.com.au:8021" 56 | } 57 | 58 | /** 59 | * A trait for simple jobs running on the NICTA cluster 60 | */ 61 | abstract class NictaSimpleJobs extends NictaHadoop with SimpleJobs -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/reactive/EventStreamSourceProxy.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package reactive 17 | 18 | /** 19 | * An EventStream that is implemented by delegating everything to an EventSource 20 | */ 21 | trait EventStreamSourceProxy[T] extends EventStream[T] with Observing { 22 | lazy val source: EventSource[T] = new EventSource[T] 23 | 24 | def flatMap[U](f: T=>EventStream[U]): EventStream[U] = source.flatMap[U](f) 25 | def foreach(f: T=>Unit)(implicit observing: Observing): Unit = source.foreach(f)(observing) 26 | def |[U>:T](that: EventStream[U]): EventStream[U] = source.|(that) 27 | def map[U](f: T=>U): EventStream[U] = source.map(f) 28 | def filter(f: T=>Boolean): EventStream[T] = source.filter(f) 29 | def collect[U](pf: PartialFunction[T,U]): EventStream[U] = source.collect(pf) 30 | def takeWhile(p: T=>Boolean): EventStream[T] = source.takeWhile(p) 31 | def foldLeft[U](initial: U)(f: (U,T)=>U): EventStream[U] = source.foldLeft(initial)(f) 32 | def hold[U>:T](init: U): Signal[U] = source.hold(init) 33 | def nonrecursive: EventStream[T] = source.nonrecursive 34 | def debugString = source.debugString 35 | def debugName = source.debugName 36 | def zipWithStaleness = source.zipWithStaleness 37 | def nonblocking = source.nonblocking 38 | def distinct = source.distinct 39 | def throttle(period: Long): EventStream[T] = source.throttle(period) 40 | 41 | private[reactive] def addListener(f: (T) => Unit): Unit = source.addListener(f) 42 | private[reactive] def removeListener(f: (T) => Unit): Unit = source.removeListener(f) 43 | } 44 | 45 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/impl/control/ImplicitParameters.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package impl 18 | package control 19 | 20 | /** 21 | * This trait can be used to overcome some limitations with method overloading due to type erasure 22 | */ 23 | trait ImplicitParameters { 24 | implicit lazy val implicitParameter: ImplicitParameter = new ImplicitParameter {} 25 | implicit lazy val implicitParameter1: ImplicitParameter1 = new ImplicitParameter1 {} 26 | implicit lazy val implicitParameter2: ImplicitParameter2 = new ImplicitParameter2 {} 27 | implicit lazy val implicitParameter3: ImplicitParameter3 = new ImplicitParameter3 {} 28 | implicit lazy val implicitParameter4: ImplicitParameter4 = new ImplicitParameter4 {} 29 | implicit lazy val implicitParameter5: ImplicitParameter5 = new ImplicitParameter5 {} 30 | implicit lazy val implicitParameter6: ImplicitParameter6 = new ImplicitParameter6 {} 31 | implicit lazy val implicitParameter7: ImplicitParameter7 = new ImplicitParameter7 {} 32 | implicit lazy val implicitParameter8: ImplicitParameter8 = new ImplicitParameter8 {} 33 | implicit lazy val implicitParameter9: ImplicitParameter9 = new ImplicitParameter9 {} 34 | implicit lazy val implicitParameter10: ImplicitParameter10 = new ImplicitParameter10 {} 35 | } 36 | trait ImplicitParameter 37 | trait ImplicitParameter1 38 | trait ImplicitParameter2 39 | trait ImplicitParameter3 40 | trait ImplicitParameter4 41 | trait ImplicitParameter5 42 | trait ImplicitParameter6 43 | trait ImplicitParameter7 44 | trait ImplicitParameter8 45 | trait ImplicitParameter9 46 | trait ImplicitParameter10 47 | 48 | object ImplicitParameters extends ImplicitParameters 49 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/guide/ScoobiDevelopment.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package guide 18 | 19 | class ScoobiDevelopment extends ScoobiPage { def is = "Scoobi Development".title^ 20 | """ 21 | ### Building 22 | 23 | Building scoobi should be as easy as `sbt publish-local` to build and install scoobi locally. An easy way to insure your applications are picking up your local version of scoobi, is by deleting the version that was downloaded from sonatype. It is stored at ~/.ivy2/cache/com.nicta.scoobi*: 24 | 25 | ### Run the tests 26 | 27 | `sbt test` will run the unit tests, as well as the tests in local mode. It's also possible to run the tests on a cluster too, with some fancy options (See testing guide) 28 | 29 | ### User Guide / Docs 30 | 31 | Are located at `src/test/scala/com/nicta/scoobi/guide`. They can be built with the command: 32 | 33 | ``` 34 | $ sbt 35 | > test-only *UserGuide* -- html 36 | ``` 37 | and are built into `target/spec2-reports/guide-SNAPSHOT/guide` 38 | 39 | ### Contributions 40 | 41 | We welcome pull-requests on github. We rebase commits aggressively (up until the point they land on master) to have a clean and linear history. So don't be surprised if the sha1 changes when it lands. As such, it makes our lives a lot easier if your commit is already based on master and all squashed down into nice logical commits. 42 | 43 | Once landing on master, our build server will then run the full tests on our internal cluster -- and if everything looks good, a new snapshot will be published. 44 | 45 | """ 46 | } 47 | -------------------------------------------------------------------------------- /src/main/ls/0.7.0-cdh4.json: -------------------------------------------------------------------------------- 1 | { 2 | "organization" : "com.nicta", 3 | "name" : "scoobi", 4 | "version" : "0.7.0-cdh4-SNAPSHOT", 5 | "description" : "scoobi", 6 | "site" : "", 7 | "tags" : [ ], 8 | "docs" : "", 9 | "resolvers" : [ "https://oss.sonatype.org/content/repositories/releases" ], 10 | "dependencies" : [ { 11 | "organization" : "org.apache.avro", 12 | "name" : "avro", 13 | "version" : "1.7.4" 14 | }, { 15 | "organization" : "com.thoughtworks.xstream", 16 | "name" : "xstream", 17 | "version" : "1.4.4" 18 | }, { 19 | "organization" : "javassist", 20 | "name" : "javassist", 21 | "version" : "3.12.1.GA" 22 | }, { 23 | "organization" : "com.googlecode.kiama", 24 | "name" : "kiama", 25 | "version" : "1.5.0" 26 | }, { 27 | "organization" : "com.github.mdr", 28 | "name" : "ascii-graphs", 29 | "version" : "0.0.3" 30 | }, { 31 | "organization" : "com.chuusai", 32 | "name" : "shapeless", 33 | "version" : "1.2.4" 34 | }, { 35 | "organization" : "org.apache.hadoop", 36 | "name" : "hadoop-client", 37 | "version" : "2.0.0-mr1-cdh4.0.1" 38 | }, { 39 | "organization" : "org.apache.hadoop", 40 | "name" : "hadoop-core", 41 | "version" : "2.0.0-mr1-cdh4.0.1" 42 | }, { 43 | "organization" : "org.apache.avro", 44 | "name" : "avro-mapred", 45 | "version" : "1.7.4" 46 | }, { 47 | "organization" : "org.scalaz", 48 | "name" : "scalaz-core", 49 | "version" : "7.0.0" 50 | }, { 51 | "organization" : "org.scalaz", 52 | "name" : "scalaz-concurrent", 53 | "version" : "7.0.0" 54 | }, { 55 | "organization" : "org.scalaz", 56 | "name" : "scalaz-scalacheck-binding", 57 | "version" : "7.0.0" 58 | }, { 59 | "organization" : "org.scalaz", 60 | "name" : "scalaz-typelevel", 61 | "version" : "7.0.0" 62 | }, { 63 | "organization" : "org.scalaz", 64 | "name" : "scalaz-xml", 65 | "version" : "7.0.0" 66 | }, { 67 | "organization" : "org.specs2", 68 | "name" : "specs2", 69 | "version" : "2.0" 70 | }, { 71 | "organization" : "org.scalacheck", 72 | "name" : "scalacheck", 73 | "version" : "1.10.0" 74 | }, { 75 | "organization" : "org.mockito", 76 | "name" : "mockito-all", 77 | "version" : "1.9.0" 78 | } ], 79 | "scalas" : [ "2.10.2" ], 80 | "licenses" : [ ], 81 | "sbt" : false 82 | } -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/guide/Advanced.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package guide 18 | 19 | class Advanced extends ScoobiPage { def is = "Advanced Notes".title^ 20 | """ 21 | ### Configuration Options 22 | 23 | Scoobi allows you to configure a few useful runtime settings: 24 | 25 | * Use a non-standard working directory by passing `-Dscoobi.workdir` to a `ScoobiApp` 26 | * Set an upper-bound on the amount of reducers scoobi will use with `ScoobiConfiguration.setMaxReducers` 27 | * Set a lower-bound on the amount of reducers scoobi use with `ScoobiConfiguration.setMinReducers` 28 | * Set the amount of reducers scoobi picks, with `ScoobiConfiguration.setBytesPerReducer` (Note: this is based on the input to a MapReduce job, not the input to the reducer. Default is 1GiB) 29 | * Disable the use of combiners with `ScoobiConfiguration.disableCombiners` 30 | 31 | Look in `ScoobiConfiguration` for other useful runtime configuration options 32 | 33 | ### Static References 34 | 35 | Values or objects that are behind a final static variable or reference won't get serialised properly. They get serialised in their initial state, not their current state. Very often this initial state might be null (for references) and 0 for Ints etc. So our recommendation is to not use them at all. And to avoid hitting the problem, don't use DelayedInit (it internally works with them) and always prefer a `val` to a `var` (especially considering variables are not shared between map-reduce jobs) 36 | 37 | 38 | ### DList Covariance 39 | 40 | At the moment DList is not covariant (that is, a `DList[Apple]` is not a `DList[Fruit]`). This is something we're slowly working on, but is quite a large issue because of its flow-on effects""" 41 | } 42 | -------------------------------------------------------------------------------- /src/main/scala/com/nicta/scoobi/application/ScoobiCommandLineArgs.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package application 18 | 19 | /** 20 | * This trait can be mixed in an Application trait in order to store command-line arguments before any code is executed 21 | */ 22 | trait ScoobiCommandLineArgs extends DelayedInit { 23 | 24 | protected[scoobi] var commandLineArguments = Seq[String]() 25 | protected[scoobi] var scoobiArguments = Seq[String]() 26 | protected[scoobi] var userArguments = Seq[String]() 27 | 28 | private var body: () => Unit = () => () 29 | 30 | /** 31 | * set the command line arguments and trigger the body 32 | */ 33 | private[scoobi] def set(args: Seq[String]) { 34 | commandLineArguments = args 35 | setScoobiArgs(args) 36 | body() 37 | } 38 | 39 | /** 40 | * scoobi arguments are the ones which are dot-separated after 'scoobi' 41 | * 42 | * hadoop jar job.jar file.txt scoobi verbose.all file2.txt 43 | * => scoobiArguments = Seq(verbose, all) 44 | */ 45 | private[scoobi] def setScoobiArgs(args: Seq[String]) { 46 | val after = args.dropWhile(_.toLowerCase != "scoobi") 47 | scoobiArguments = after.drop(1).take(1).map(_.toLowerCase).flatMap(_.split("\\.")) 48 | } 49 | 50 | /** 51 | * the users arguments are the ones which are before and after the scoobi arguments: 52 | * 53 | * hadoop jar job.jar file.txt scoobi verbose.all file2.txt 54 | * => userArguments = Seq(file.txt, file2.txt) 55 | */ 56 | private[scoobi] def setRemainingArgs(args: Seq[String]) { 57 | val (before, after) = args.span(_.toLowerCase != "scoobi") 58 | userArguments = before ++ after.drop(2) 59 | } 60 | 61 | def delayedInit(x: =>Unit) { body = () => x } 62 | } 63 | -------------------------------------------------------------------------------- /src/test/scala/com/nicta/scoobi/acceptance/RandomDListsSpec.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.nicta.scoobi 17 | package acceptance 18 | 19 | import impl.plan.comp._ 20 | import testing.mutable.NictaSimpleJobs 21 | import impl.ScoobiConfiguration 22 | import core.{ProcessNode, DList} 23 | import impl.plan.DListImpl 24 | 25 | class RandomDListsSpecification extends NictaSimpleJobs with CompNodeData { 26 | "A DList must return an equivalent result, whether it's executed in memory or locally" >> prop { (l1: DList[String]) => 27 | compareExecutions(l1) 28 | }.set(minTestsOk = 20) 29 | 30 | def compareExecutions(l1: DList[String]) = { 31 | val locally = l1.run(configureForLocal(ScoobiConfiguration())) 32 | val inMemory = l1.run(configureForInMemory(ScoobiConfiguration())) 33 | 34 | locally aka "the local hadoop results" must contain(exactly(inMemory:_*)) 35 | 36 | "====== EXAMPLE OK ======\n".pp; ok 37 | } 38 | } 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /examples/scoobding/src/main/scala/application/text/PrettyPrinter.scala: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2011,2012 National ICT Australia Limited 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package text 17 | 18 | /** 19 | * This class helps with the nice display of text. 20 | * 21 | * If a text is longer than lineSize, it will break the text at the first space and wrap the rest. 22 | */ 23 | case class PrettyPrinter(lineSize: Option[Int] = None, separator: String = "\\s") { 24 | def print(text: String) = printLines(text)._2 25 | 26 | private 27 | def printLines(text: String) : (Int, String) = { 28 | 29 | lineSize.map { maxSize => 30 | if (text.size < maxSize) (text.size, text) 31 | else 32 | text.split(separator).foldLeft((0, "")) { (res, cur) => 33 | val (currentLineSize, result) = res 34 | if (result.isEmpty) (cur.size, cur) 35 | else if (currentLineSize + cur.size < maxSize) (currentLineSize + cur.size + 1, result+" "+cur) 36 | else if (cur.size > maxSize) { 37 | val (lastSize, printOnCommas) = PrettyPrinter(maxSize, "\\,").printLines(cur) 38 | (lastSize, result+"\n"+printOnCommas) 39 | } 40 | else (cur.size, result+"\n"+cur) 41 | } 42 | }.getOrElse((text.size, text)) 43 | } 44 | 45 | /** 46 | * @return a pretty printed line which can be displayed in a Swing tooltip with multilines 47 | */ 48 | def asToolTip(text: String) = { 49 | ""+print(text).replace("\n", "
").replaceAll("\\s", " ")+"" 50 | } 51 | } 52 | 53 | object PrettyPrinter { 54 | def apply(lineSize: Int): PrettyPrinter = PrettyPrinter(Some(lineSize)) 55 | def apply(lineSize: Int, separator: String): PrettyPrinter = PrettyPrinter(Some(lineSize), separator) 56 | } --------------------------------------------------------------------------------