├── .gitignore ├── LICENSE ├── README.asciidoc ├── boot ├── boot-fsshell │ ├── .gitignore │ ├── README.md │ ├── build.gradle │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── pom.xml │ ├── settings.gradle │ └── src │ │ └── main │ │ ├── java │ │ └── demo │ │ │ └── DemoApplication.java │ │ └── resources │ │ ├── application.yml │ │ └── log4j.properties ├── yarn-boot-simple │ ├── .gitignore │ ├── LICENSE.code.txt │ ├── LICENSE.writing.txt │ ├── README.md │ ├── build.gradle │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── rm-ui.png │ ├── settings.gradle │ └── src │ │ └── main │ │ ├── java │ │ └── hello │ │ │ ├── appmaster │ │ │ └── AppmasterApplication.java │ │ │ ├── client │ │ │ └── ClientApplication.java │ │ │ └── container │ │ │ ├── ContainerApplication.java │ │ │ └── HelloPojo.java │ │ └── resources │ │ └── application.yml └── yarn-store-groups │ ├── README.md │ ├── appmaster │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── hello │ │ │ └── appmaster │ │ │ └── AppmasterApplication.java │ │ └── resources │ │ ├── application.yml │ │ └── log4j.properties │ ├── build.gradle │ ├── client │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── hello │ │ │ └── client │ │ │ └── ClientApplication.java │ │ └── resources │ │ ├── application.yml │ │ └── log4j.properties │ ├── container │ ├── pom.xml │ └── src │ │ └── main │ │ ├── java │ │ └── hello │ │ │ └── container │ │ │ └── ContainerApplication.java │ │ └── resources │ │ ├── application.yml │ │ └── log4j.properties │ ├── dist │ ├── assembly.xml │ └── pom.xml │ ├── gradle │ └── wrapper │ │ ├── gradle-wrapper.jar │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── pom.xml │ └── settings.gradle ├── dataset ├── .gitignore ├── pom.xml └── src │ └── main │ ├── java │ └── org │ │ └── springframework │ │ └── samples │ │ └── hadoop │ │ └── dataset │ │ ├── DatasetConfig.java │ │ ├── FileInfo.java │ │ └── MyFilesApp.java │ └── resources │ ├── application.properties │ ├── hadoop-context.xml │ └── logback.xml ├── hbase ├── README.asciidoc ├── pom.xml └── src │ └── main │ ├── config │ └── log4j.properties │ ├── java │ └── org │ │ └── springframework │ │ └── samples │ │ └── hadoop │ │ └── hbase │ │ ├── User.java │ │ ├── UserApp.java │ │ ├── UserRepository.java │ │ └── UserUtils.java │ └── resources │ ├── META-INF │ └── spring │ │ └── application-context.xml │ └── hbase.properties ├── hive-batch ├── .gitignore ├── README.asciidoc ├── data │ └── nbatweets-small.txt ├── pom.xml └── src │ └── main │ ├── config │ └── log4j.properties │ ├── java │ └── org │ │ └── springframework │ │ └── samples │ │ └── hadoop │ │ └── hive │ │ └── HiveBatchApp.java │ └── resources │ ├── META-INF │ └── spring │ │ ├── batch-common-context.xml │ │ └── hive-batch-context.xml │ ├── file-prep.groovy │ ├── hadoop.properties │ ├── hive.properties │ ├── results.groovy │ ├── set-hive-permissions.groovy │ └── tweet-influencers.hql ├── hive ├── .gitignore ├── README.asciidoc ├── data │ ├── apache.log │ └── passwd ├── pom.xml └── src │ └── main │ ├── config │ └── log4j.properties │ ├── java │ └── org │ │ └── springframework │ │ └── samples │ │ └── hadoop │ │ └── hive │ │ ├── HiveApp.java │ │ ├── HiveAppWithApacheLogs.java │ │ ├── HiveClientApp.java │ │ ├── HiveClientPasswordRepository.java │ │ ├── HiveTemplatePasswordRepository.java │ │ └── PasswordRepository.java │ └── resources │ ├── .gitignore │ ├── META-INF │ └── spring │ │ ├── hive-apache-log-context.xml │ │ └── hive-context.xml │ ├── apache-log-simple.hql │ ├── copy-files.groovy │ ├── hadoop.properties │ ├── hive.properties │ ├── password-analysis.hql │ └── set-hive-permissions.groovy ├── mapreduce ├── .classpath ├── .gitignore ├── .project ├── README.asciidoc ├── data │ └── nietzsche-chapter-1.txt ├── etc │ ├── cdh5 │ │ └── yarn-site.xml │ ├── hdp22 │ │ └── mapred-site.xml │ └── phd30 │ │ └── mapred-site.xml ├── pom.xml └── src │ └── main │ ├── config │ └── log4j.properties │ ├── java │ └── org │ │ └── springframework │ │ └── samples │ │ └── hadoop │ │ └── mapreduce │ │ └── Wordcount.java │ └── resources │ ├── META-INF │ └── spring │ │ └── application-context.xml │ ├── copy-files.groovy │ └── hadoop.properties ├── mr-batch ├── .gitignore ├── README.asciidoc ├── data │ └── nbatweets-small.txt ├── pom.xml └── src │ └── main │ ├── config │ └── log4j.properties │ ├── java │ └── org │ │ └── springframework │ │ └── samples │ │ └── hadoop │ │ └── mapreduce │ │ ├── HashtagCount.java │ │ └── MrBatchApp.java │ └── resources │ ├── META-INF │ └── spring │ │ ├── batch-common-context.xml │ │ └── mr-batch-context.xml │ ├── file-prep.groovy │ ├── hadoop.properties │ └── results.groovy ├── parent └── pom.xml ├── pig ├── .gitignore ├── README.asciidoc ├── data │ └── apache.log ├── lib │ └── piggybank-0.9.2.jar ├── pom.xml └── src │ └── main │ ├── config │ └── log4j.properties │ ├── java │ └── org │ │ └── springframework │ │ └── samples │ │ └── hadoop │ │ └── pig │ │ ├── PasswordRepository.java │ │ ├── PasswordService.java │ │ ├── PigApp.java │ │ ├── PigAppWithApacheLogs.java │ │ ├── PigAppWithRepository.java │ │ └── PigPasswordRepository.java │ └── resources │ ├── META-INF │ └── spring │ │ ├── pig-context-apache-logs.xml │ │ ├── pig-context-password-repository.xml │ │ └── pig-context.xml │ ├── apache-log-simple.pig │ ├── apache-log.pig │ ├── copy-files.groovy │ ├── core-site.xml │ ├── hadoop.properties │ ├── password-analysis.pig │ ├── pig-analysis.properties │ └── pig-server.properties └── yarn ├── README.md ├── build.gradle ├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle └── yarn ├── README.md ├── batch-amjob ├── README.md └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── springframework │ │ │ └── yarn │ │ │ └── examples │ │ │ └── PrintTasklet.java │ └── resources │ │ ├── application-context.xml │ │ └── appmaster-context.xml │ └── test │ ├── java │ └── org │ │ └── springframework │ │ └── yarn │ │ └── examples │ │ └── BatchAmjobTests.java │ └── resources │ └── org │ └── springframework │ └── yarn │ └── examples │ └── BatchAmjobTests-context.xml ├── batch-files ├── README.md └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── springframework │ │ │ └── yarn │ │ │ └── examples │ │ │ ├── LoggingItemWriter.java │ │ │ └── PrintTasklet.java │ └── resources │ │ ├── application-context.xml │ │ ├── appmaster-context.xml │ │ └── container-context.xml │ └── test │ ├── java │ └── org │ │ └── springframework │ │ └── yarn │ │ └── examples │ │ └── BatchFilesTests.java │ └── resources │ └── org │ └── springframework │ └── yarn │ └── examples │ └── BatchFilesTests-context.xml ├── batch-partition ├── README.md └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── springframework │ │ │ └── yarn │ │ │ └── examples │ │ │ └── PrintTasklet.java │ └── resources │ │ ├── application-context.xml │ │ ├── appmaster-context.xml │ │ └── container-context.xml │ └── test │ ├── java │ └── org │ │ └── springframework │ │ └── yarn │ │ └── examples │ │ └── BatchPartitionTests.java │ └── resources │ └── org │ └── springframework │ └── yarn │ └── examples │ └── BatchPartitionTests-context.xml ├── build.gradle ├── custom-amservice ├── README.md └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── springframework │ │ │ └── yarn │ │ │ └── examples │ │ │ ├── CustomAppmaster.java │ │ │ ├── CustomAppmasterService.java │ │ │ ├── CustomContainer.java │ │ │ ├── JobRequest.java │ │ │ └── JobResponse.java │ └── resources │ │ ├── application-context.xml │ │ ├── appmaster-context.xml │ │ └── container-context.xml │ └── test │ ├── java │ └── org │ │ └── springframework │ │ └── yarn │ │ └── examples │ │ └── CustomAmserviceTests.java │ └── resources │ └── org │ └── springframework │ └── yarn │ └── examples │ └── CustomAmserviceTests-context.xml ├── kill-application ├── README.md └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── springframework │ │ │ └── yarn │ │ │ └── examples │ │ │ ├── KillApplicationContainer.java │ │ │ └── Main.java │ └── resources │ │ ├── application-context.xml │ │ ├── appmaster-context.xml │ │ └── container-context.xml │ └── test │ ├── java │ └── org │ │ └── springframework │ │ └── yarn │ │ └── examples │ │ └── KillApplicationTests.java │ └── resources │ └── org │ └── springframework │ └── yarn │ └── examples │ └── KillApplicationTests-context.xml ├── list-applications ├── README.md └── src │ ├── main │ ├── java │ │ ├── .gitignore │ │ └── org │ │ │ └── springframework │ │ │ └── yarn │ │ │ └── examples │ │ │ └── Main.java │ └── resources │ │ └── application-context.xml │ └── test │ ├── java │ └── org │ │ └── springframework │ │ └── yarn │ │ └── examples │ │ └── ListApplicationsTests.java │ └── resources │ └── org │ └── springframework │ └── yarn │ └── examples │ └── ListApplicationsTests-context.xml ├── multi-context ├── README.md └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── springframework │ │ │ └── yarn │ │ │ └── examples │ │ │ ├── AppmasterConfiguration.java │ │ │ ├── ClientConfiguration.java │ │ │ ├── ContainerConfiguration.java │ │ │ └── MultiContextContainer.java │ └── resources │ │ ├── application-context.xml │ │ ├── appmaster-context.xml │ │ └── container-context.xml │ └── test │ ├── java │ └── org │ │ └── springframework │ │ └── yarn │ │ └── examples │ │ ├── MultiContextJavaConfigTests.java │ │ ├── MultiContextTests.java │ │ └── MultiContextXmlConfigTests.java │ └── resources │ └── org │ └── springframework │ └── yarn │ └── examples │ └── MultiContextXmlConfigTests-context.xml ├── restart-context ├── README.md └── src │ ├── main │ ├── java │ │ └── org │ │ │ └── springframework │ │ │ └── yarn │ │ │ └── examples │ │ │ ├── CustomAppmaster.java │ │ │ └── FailingContextContainer.java │ └── resources │ │ ├── application-context.xml │ │ ├── appmaster-context.xml │ │ └── container-context.xml │ └── test │ ├── java │ └── org │ │ └── springframework │ │ └── yarn │ │ └── examples │ │ └── RestartContextTests.java │ └── resources │ └── org │ └── springframework │ └── yarn │ └── examples │ └── RestartContextTests-context.xml ├── simple-command ├── README.md └── src │ ├── main │ ├── java │ │ └── .gitignore │ └── resources │ │ ├── application-context.xml │ │ └── appmaster-context.xml │ └── test │ ├── java │ └── org │ │ └── springframework │ │ └── yarn │ │ └── examples │ │ └── SimpleCommandTests.java │ └── resources │ └── org │ └── springframework │ └── yarn │ └── examples │ └── SimpleCommandTests-context.xml └── src └── main ├── java └── org │ └── springframework │ └── yarn │ └── examples │ └── CommonMain.java └── resources ├── hadoop.properties └── log4j.properties /.gitignore: -------------------------------------------------------------------------------- 1 | .settings 2 | .project 3 | .classpath 4 | .springBeans 5 | .gradle 6 | .idea 7 | *.iws 8 | *.ipr 9 | *.class 10 | *.lk 11 | *.if 12 | build 13 | vf.* 14 | bin 15 | *.iml 16 | *.7z 17 | *.dmg 18 | *.gz 19 | *.iso 20 | *.rar 21 | *.tar 22 | *.zip 23 | target/ 24 | .DS_Store* 25 | ehthumbs.db 26 | Icon? 27 | Thumbs.db 28 | -------------------------------------------------------------------------------- /README.asciidoc: -------------------------------------------------------------------------------- 1 | == spring-hadoop-samples is no longer actively maintained by VMware, Inc. 2 | 3 | == Sample Applications for Spring for Apache Hadoop 4 | 5 | This repository contains several sample applications that show how you can use 6 | Spring for Apache Hadoop. 7 | 8 | NOTE: These samples are built using version 2.2.0.RELEASE of Spring for Apache Hadoop project. For examples built against older versions check out the Git "tag" that corresponds to your desired version. 9 | 10 | === Overview of Spring for Apache Hadoop 11 | 12 | Hadoop has a poor out of the box programming model. Writing applications for Hadoop generally turn into a collection of scripts calling Hadoop command line applications. Spring for Apache Hadoop provides a consistent programming model and declarative configuration model for developing Hadoop applications. 13 | 14 | Together with Spring Integration and Spring Batch, Spring for Apache Hadoop can be used to address a wide range of use cases 15 | 16 | * HDFS data access and scripting 17 | * Data Analysis 18 | ** MapReduce 19 | ** Pig 20 | ** Hive 21 | * Workflow 22 | * Data collection and ingestion 23 | * Event Streams processing 24 | 25 | === Features 26 | 27 | * Declarative configuration to create, configure, and parameterize Hadoop connectivity and all job types (MR/Streaming MR/Pig/Hive/Cascading) 28 | * Simplify HDFS API with added support for JVM scripting languages 29 | * Runner classes for MR/Pig/Hive/Cascading for small workflows consisting of the following steps HDFS operations -> data analysis -> HDFS operations 30 | * Helper “Template” classes for Pig/Hive/HBase 31 | ** Execute scripts and queries without worrying about Resource Management Exception Handling and Translation 32 | ** Thread-safety 33 | * Lightweight Object-Mapping for HBase 34 | * Hadoop components for Spring Integratio and Spring Batch 35 | ** Spring Batch tasklets for HDFS and data analysis 36 | ** Spring Batch HDFS ItemWriters 37 | ** Spring Integration HDFS channel adapters 38 | 39 | == Additional Resources 40 | 41 | Many of the samples were taken from the O'Reilly book link:http://shop.oreilly.com/product/0636920024767.do[Spring Data]. Using the book as a companion to the samples is quite helpful to understanding the samples and the full feature set of what can be done using Spring technologies and Hadoop. 42 | 43 | The main web site for link:http://www.springsource.org/spring-data/hadoop[Spring for Apache Hadoop] 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /boot/boot-fsshell/.gitignore: -------------------------------------------------------------------------------- 1 | .gradle 2 | bin 3 | build 4 | .settings 5 | .classpath 6 | .springBeans 7 | .project 8 | *.iml 9 | *.ipr 10 | *.iws 11 | metastore_db 12 | /samples/pig-scripting/src/main/resources/ml-100k.zip 13 | /samples/pig-scripting/src/main/resources/ml-100k/u.data 14 | /src/test/resources/s3.properties 15 | /.idea/ 16 | .DS_Store 17 | /out/ 18 | target 19 | *.log 20 | -------------------------------------------------------------------------------- /boot/boot-fsshell/README.md: -------------------------------------------------------------------------------- 1 | # What you'll build 2 | 3 | You'll build a simple Spring Hadoop application using Spring Boot 4 | doing a simple listing from hdfs. 5 | 6 | # Build 7 | 8 | With gradle: 9 | ``` 10 | $ ./gradlew clean build 11 | ``` 12 | 13 | With maven: 14 | ``` 15 | $ mvn clean package 16 | ``` 17 | 18 | # Run the application 19 | 20 | With gradle: 21 | ``` 22 | $ java -jar build/libs/boot-fsshell-0.1.0.jar 23 | ``` 24 | 25 | With maven: 26 | ``` 27 | $ java -jar target/boot-fsshell-0.1.0.jar 28 | ``` 29 | 30 | Running the command you should get a listing of files under `/tmp` hdfs directory. 31 | 32 | ``` 33 | > hdfs://localhost:8020/tmp 34 | > hdfs://localhost:8020/tmp/hadoop-yarn 35 | > hdfs://localhost:8020/tmp/hadoop-yarn/staging 36 | > hdfs://localhost:8020/tmp/hadoop-yarn/staging/history 37 | > hdfs://localhost:8020/tmp/hadoop-yarn/staging/history/done 38 | > hdfs://localhost:8020/tmp/hadoop-yarn/staging/history/done_intermediate 39 | ``` 40 | 41 | # How it works 42 | 43 | Spring Hadoop `spring-data-hadoop-boot` package contains Boot auto-configuration 44 | for Hadoop. Currently it will automatically configure Hadoop's `Configuration` class 45 | as a bean in Spring application context. `FsShell` is created automatically and 46 | configured with automatically created `Configuration` bean. To use local 47 | hdfs instance, simply place below content to `application.yml` file. 48 | 49 | 50 | ``` 51 | spring: 52 | main: 53 | show_banner: false 54 | hadoop: 55 | fsUri: hdfs://localhost:8020 56 | ``` 57 | 58 | -------------------------------------------------------------------------------- /boot/boot-fsshell/build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | repositories { 3 | maven { url "http://repo.spring.io/libs-release" } 4 | } 5 | dependencies { 6 | classpath("org.springframework.boot:spring-boot-gradle-plugin:1.2.3.RELEASE") 7 | } 8 | } 9 | 10 | version = '0.1.0' 11 | 12 | apply plugin: 'java' 13 | apply plugin: 'eclipse' 14 | apply plugin: 'idea' 15 | apply plugin: 'spring-boot' 16 | 17 | repositories { 18 | mavenCentral() 19 | maven { url "http://repo.spring.io/libs-release" } 20 | } 21 | 22 | dependencies { 23 | compile("org.springframework.data:spring-data-hadoop-boot:2.2.0.RELEASE") 24 | compile("org.springframework.boot:spring-boot-starter-log4j") 25 | } 26 | 27 | compileJava { 28 | sourceCompatibility=1.7 29 | targetCompatibility=1.7 30 | } 31 | 32 | task wrapper(type: Wrapper) { 33 | gradleVersion = '2.2.1' 34 | } 35 | -------------------------------------------------------------------------------- /boot/boot-fsshell/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spring-attic/spring-hadoop-samples/18e6641b2ae6830753abc9b0e7a3e45ade814e0e/boot/boot-fsshell/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /boot/boot-fsshell/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Fri Jan 16 15:01:20 GMT 2015 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-bin.zip 7 | -------------------------------------------------------------------------------- /boot/boot-fsshell/gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 12 | set DEFAULT_JVM_OPTS= 13 | 14 | set DIRNAME=%~dp0 15 | if "%DIRNAME%" == "" set DIRNAME=. 16 | set APP_BASE_NAME=%~n0 17 | set APP_HOME=%DIRNAME% 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windowz variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | if "%@eval[2+2]" == "4" goto 4NT_args 53 | 54 | :win9xME_args 55 | @rem Slurp the command line arguments. 56 | set CMD_LINE_ARGS= 57 | set _SKIP=2 58 | 59 | :win9xME_args_slurp 60 | if "x%~1" == "x" goto execute 61 | 62 | set CMD_LINE_ARGS=%* 63 | goto execute 64 | 65 | :4NT_args 66 | @rem Get arguments from the 4NT Shell from JP Software 67 | set CMD_LINE_ARGS=%$ 68 | 69 | :execute 70 | @rem Setup the command line 71 | 72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if "%ERRORLEVEL%"=="0" goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 85 | exit /b 1 86 | 87 | :mainEnd 88 | if "%OS%"=="Windows_NT" endlocal 89 | 90 | :omega 91 | -------------------------------------------------------------------------------- /boot/boot-fsshell/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | demo 8 | boot-fsshell 9 | 0.1.0 10 | jar 11 | 12 | 13 | org.springframework.boot 14 | spring-boot-starter-parent 15 | 1.2.3.RELEASE 16 | 17 | 18 | 19 | 20 | org.springframework.data 21 | spring-data-hadoop-boot 22 | 2.2.0.RELEASE 23 | 24 | 25 | org.springframework.boot 26 | spring-boot-starter-log4j 27 | 28 | 29 | 30 | 31 | 32 | 33 | maven-compiler-plugin 34 | 2.3.2 35 | 36 | 37 | org.springframework.boot 38 | spring-boot-maven-plugin 39 | 40 | 41 | 42 | 43 | 44 | 45 | spring-release 46 | http://repo.spring.io/libs-release 47 | false 48 | 49 | 50 | 51 | 52 | 53 | spring-release 54 | http://repo.spring.io/libs-release 55 | false 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /boot/boot-fsshell/settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'boot-fsshell' 2 | -------------------------------------------------------------------------------- /boot/boot-fsshell/src/main/java/demo/DemoApplication.java: -------------------------------------------------------------------------------- 1 | package demo; 2 | 3 | import org.apache.hadoop.fs.FileStatus; 4 | import org.springframework.beans.factory.annotation.Autowired; 5 | import org.springframework.boot.CommandLineRunner; 6 | import org.springframework.boot.SpringApplication; 7 | import org.springframework.boot.autoconfigure.SpringBootApplication; 8 | import org.springframework.data.hadoop.fs.FsShell; 9 | 10 | @SpringBootApplication 11 | public class DemoApplication implements CommandLineRunner { 12 | 13 | @Autowired 14 | private FsShell shell; 15 | 16 | @Override 17 | public void run(String... args) { 18 | for (FileStatus s : shell.lsr("/tmp")) { 19 | System.out.println("> " + s.getPath()); 20 | } 21 | } 22 | 23 | public static void main(String[] args) { 24 | SpringApplication.run(DemoApplication.class, args); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /boot/boot-fsshell/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | spring: 2 | main: 3 | show_banner: false 4 | hadoop: 5 | fsUri: hdfs://localhost:8020 6 | 7 | -------------------------------------------------------------------------------- /boot/boot-fsshell/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootCategory=WARN, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d %p [%C{1}] - %m%n 6 | 7 | log4j.category.org.apache.hadoop=OFF 8 | 9 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/.gitignore: -------------------------------------------------------------------------------- 1 | # Operating System Files 2 | 3 | *.DS_Store 4 | Thumbs.db 5 | *.sw? 6 | .#* 7 | *# 8 | *~ 9 | *.sublime-* 10 | 11 | # Build Artifacts 12 | 13 | .gradle/ 14 | build/ 15 | target/ 16 | bin/ 17 | dependency-reduced-pom.xml 18 | 19 | # Eclipse Project Files 20 | 21 | .classpath 22 | .project 23 | .settings/ 24 | 25 | # IntelliJ IDEA Files 26 | 27 | *.iml 28 | *.ipr 29 | *.iws 30 | *.idea 31 | 32 | README.html 33 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/LICENSE.code.txt: -------------------------------------------------------------------------------- 1 | All code in this repository is: 2 | ======================================================================= 3 | Copyright (c) 2013 GoPivotal, Inc. All Rights Reserved 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/LICENSE.writing.txt: -------------------------------------------------------------------------------- 1 | Except where otherwise noted, this work is licensed under http://creativecommons.org/licenses/by-nd/3.0/ 2 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/README.md: -------------------------------------------------------------------------------- 1 | # What you'll build 2 | 3 | You'll build a simple Hadoop YARN application with Spring Hadoop and Spring Boot. 4 | 5 | # Build 6 | 7 | Simple run a gradle build command. 8 | 9 | ```text 10 | $ ./gradlew clean build 11 | ``` 12 | 13 | # Run the application 14 | 15 | Now that you've successfully compiled and packaged your application, it's time to do the fun part and execute it on a Hadoop YARN. 16 | 17 | Below listing shows files after a succesfull gradle build. 18 | 19 | ```text 20 | $ ls -lt build/libs/ 21 | -rw-r--r-- 1 hadoop hadoop 35975001 Feb 2 17:39 yarn-boot-simple-container-0.1.0.jar 22 | -rw-r--r-- 1 hadoop hadoop 35973937 Feb 2 17:39 yarn-boot-simple-client-0.1.0.jar 23 | -rw-r--r-- 1 hadoop hadoop 35973840 Feb 2 17:39 yarn-boot-simple-appmaster-0.1.0.jar 24 | ``` 25 | 26 | Simply run your executable client jar. 27 | 28 | ```text 29 | $ java -jar build/libs/yarn-boot-simple-client-0.1.0.jar 30 | ``` 31 | 32 | Using a Resource Manager UI you can see status of an application. 33 | 34 | ![Resource Manager UI](https://raw.github.com/spring-projects/spring-hadoop-samples/master/boot/yarn-boot-simple/rm-ui.png) 35 | 36 | To find Hadoop's application logs, do a little find within a configured userlogs directory. 37 | 38 | ```text 39 | $ find hadoop/logs/userlogs/|grep std 40 | hadoop/logs/userlogs/application_1391506550167_0001/container_1391506550167_0001_01_000002/Container.stdout 41 | hadoop/logs/userlogs/application_1391506550167_0001/container_1391506550167_0001_01_000002/Container.stderr 42 | hadoop/logs/userlogs/application_1391506550167_0001/container_1391506550167_0001_01_000001/Appmaster.stdout 43 | hadoop/logs/userlogs/application_1391506550167_0001/container_1391506550167_0001_01_000001/Appmaster.stderr 44 | ``` 45 | 46 | Grep logging output from a `HelloPojo` class. 47 | 48 | ```text 49 | $ grep HelloPojo hadoop/logs/userlogs/application_1391506550167_0001/container_1391506550167_0001_01_000002/Container.stdout 50 | [2014-02-02 17:40:38,314] boot - 11944 INFO [main] --- HelloPojo: Hello from HelloPojo 51 | [2014-02-02 17:40:38,315] boot - 11944 INFO [main] --- HelloPojo: About to list from hdfs root content 52 | [2014-02-02 17:40:41,134] boot - 11944 INFO [main] --- HelloPojo: FileStatus{path=hdfs://localhost:8020/; isDirectory=true; modification_time=1390823919636; access_time=0; owner=root; group=supergroup; permission=rwxr-xr-x; isSymlink=false} 53 | [2014-02-02 17:40:41,135] boot - 11944 INFO [main] --- HelloPojo: FileStatus{path=hdfs://localhost:8020/app; isDirectory=true; modification_time=1391203430490; access_time=0; owner=jvalkealahti; group=supergroup; permission=rwxr-xr-x; isSymlink=false} 54 | ``` 55 | 56 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | repositories { 3 | maven { url "http://repo.spring.io/libs-release" } 4 | } 5 | dependencies { 6 | classpath("org.springframework.boot:spring-boot-gradle-plugin:1.2.3.RELEASE") 7 | } 8 | } 9 | 10 | version = '0.1.0' 11 | 12 | apply plugin: 'java' 13 | apply plugin: 'eclipse' 14 | apply plugin: 'idea' 15 | apply plugin: 'spring-boot' 16 | 17 | repositories { 18 | mavenCentral() 19 | maven { url "http://repo.spring.io/libs-release" } 20 | } 21 | 22 | dependencies { 23 | compile("org.springframework.data:spring-yarn-boot:2.2.0.RELEASE") 24 | } 25 | 26 | compileJava { 27 | sourceCompatibility=1.7 28 | targetCompatibility=1.7 29 | } 30 | 31 | task clientJar(type: Jar) { 32 | appendix = 'client' 33 | baseName = 'yarn-boot-simple' 34 | from sourceSets.main.output 35 | exclude('**/appmaster/*') 36 | exclude('**/container/*') 37 | } 38 | 39 | task appmasterJar(type: Jar) { 40 | appendix = 'appmaster' 41 | baseName = 'yarn-boot-simple' 42 | from sourceSets.main.output 43 | exclude('**/client/*') 44 | exclude('**/container/*') 45 | } 46 | 47 | task containerJar(type: Jar) { 48 | appendix = 'container' 49 | baseName = 'yarn-boot-simple' 50 | from sourceSets.main.output 51 | exclude('**/appmaster/*') 52 | exclude('**/client/*') 53 | } 54 | 55 | task clientBoot(type: BootRepackage, dependsOn: clientJar) { 56 | withJarTask = clientJar 57 | mainClass = 'hello.client.ClientApplication' 58 | } 59 | 60 | task appmasterBoot(type: BootRepackage, dependsOn: appmasterJar) { 61 | withJarTask = appmasterJar 62 | mainClass = 'hello.appmaster.AppmasterApplication' 63 | } 64 | 65 | task containerBoot(type: BootRepackage, dependsOn: containerJar) { 66 | withJarTask = containerJar 67 | mainClass = 'hello.container.ContainerApplication' 68 | } 69 | 70 | jar.enabled = false 71 | bootRepackage.enabled = false 72 | 73 | task bootJars 74 | bootJars.dependsOn = [clientBoot, appmasterBoot, containerBoot] 75 | 76 | build.dependsOn(bootJars) 77 | 78 | task wrapper(type: Wrapper) { 79 | gradleVersion = '2.2.1' 80 | } 81 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spring-attic/spring-hadoop-samples/18e6641b2ae6830753abc9b0e7a3e45ade814e0e/boot/yarn-boot-simple/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /boot/yarn-boot-simple/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Thu Jul 30 16:16:27 EDT 2015 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=http\://services.gradle.org/distributions/gradle-2.2.1-bin.zip 7 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 12 | set DEFAULT_JVM_OPTS= 13 | 14 | set DIRNAME=%~dp0 15 | if "%DIRNAME%" == "" set DIRNAME=. 16 | set APP_BASE_NAME=%~n0 17 | set APP_HOME=%DIRNAME% 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windowz variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | if "%@eval[2+2]" == "4" goto 4NT_args 53 | 54 | :win9xME_args 55 | @rem Slurp the command line arguments. 56 | set CMD_LINE_ARGS= 57 | set _SKIP=2 58 | 59 | :win9xME_args_slurp 60 | if "x%~1" == "x" goto execute 61 | 62 | set CMD_LINE_ARGS=%* 63 | goto execute 64 | 65 | :4NT_args 66 | @rem Get arguments from the 4NT Shell from JP Software 67 | set CMD_LINE_ARGS=%$ 68 | 69 | :execute 70 | @rem Setup the command line 71 | 72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if "%ERRORLEVEL%"=="0" goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 85 | exit /b 1 86 | 87 | :mainEnd 88 | if "%OS%"=="Windows_NT" endlocal 89 | 90 | :omega 91 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/rm-ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spring-attic/spring-hadoop-samples/18e6641b2ae6830753abc9b0e7a3e45ade814e0e/boot/yarn-boot-simple/rm-ui.png -------------------------------------------------------------------------------- /boot/yarn-boot-simple/settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'yarn-boot-simple' 2 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/src/main/java/hello/appmaster/AppmasterApplication.java: -------------------------------------------------------------------------------- 1 | package hello.appmaster; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.EnableAutoConfiguration; 5 | 6 | @EnableAutoConfiguration 7 | public class AppmasterApplication { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(AppmasterApplication.class, args); 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/src/main/java/hello/client/ClientApplication.java: -------------------------------------------------------------------------------- 1 | package hello.client; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.EnableAutoConfiguration; 5 | import org.springframework.yarn.client.YarnClient; 6 | 7 | @EnableAutoConfiguration 8 | public class ClientApplication { 9 | 10 | public static void main(String[] args) { 11 | SpringApplication.run(ClientApplication.class, args) 12 | .getBean(YarnClient.class) 13 | .submitApplication(); 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/src/main/java/hello/container/ContainerApplication.java: -------------------------------------------------------------------------------- 1 | package hello.container; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.EnableAutoConfiguration; 5 | import org.springframework.context.annotation.Bean; 6 | import org.springframework.context.annotation.Configuration; 7 | 8 | @Configuration 9 | @EnableAutoConfiguration 10 | public class ContainerApplication { 11 | 12 | public static void main(String[] args) { 13 | SpringApplication.run(ContainerApplication.class, args); 14 | } 15 | 16 | @Bean 17 | public HelloPojo helloPojo() { 18 | return new HelloPojo(); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/src/main/java/hello/container/HelloPojo.java: -------------------------------------------------------------------------------- 1 | package hello.container; 2 | 3 | import org.apache.commons.logging.Log; 4 | import org.apache.commons.logging.LogFactory; 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.fs.FileStatus; 7 | import org.springframework.beans.factory.annotation.Autowired; 8 | import org.springframework.data.hadoop.fs.FsShell; 9 | import org.springframework.yarn.annotation.OnContainerStart; 10 | import org.springframework.yarn.annotation.YarnComponent; 11 | 12 | @YarnComponent 13 | public class HelloPojo { 14 | 15 | private static final Log log = LogFactory.getLog(HelloPojo.class); 16 | 17 | @Autowired 18 | private Configuration configuration; 19 | 20 | @OnContainerStart 21 | public void publicVoidNoArgsMethod() { 22 | log.info("Hello from HelloPojo"); 23 | log.info("About to list from hdfs root content"); 24 | 25 | @SuppressWarnings("resource") 26 | FsShell shell = new FsShell(configuration); 27 | for (FileStatus s : shell.ls(false, "/")) { 28 | log.info(s); 29 | } 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /boot/yarn-boot-simple/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | spring: 2 | hadoop: 3 | fsUri: hdfs://localhost:8020 4 | resourceManagerHost: localhost 5 | yarn: 6 | appName: yarn-boot-simple 7 | applicationDir: /app/yarn-boot-simple/ 8 | client: 9 | files: 10 | - "file:build/libs/yarn-boot-simple-container-0.1.0.jar" 11 | - "file:build/libs/yarn-boot-simple-appmaster-0.1.0.jar" 12 | launchcontext: 13 | archiveFile: yarn-boot-simple-appmaster-0.1.0.jar 14 | appmaster: 15 | containerCount: 1 16 | launchcontext: 17 | archiveFile: yarn-boot-simple-container-0.1.0.jar -------------------------------------------------------------------------------- /boot/yarn-store-groups/appmaster/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | org.springframework 9 | appmaster 10 | 0.1.0 11 | 12 | 13 | org.springframework 14 | yarn-store-groups 15 | 0.1.0 16 | 17 | 18 | 19 | 20 | org.springframework.data 21 | spring-yarn-boot 22 | 2.2.0.RELEASE 23 | 24 | 25 | javax.servlet 26 | servlet-api 27 | 28 | 29 | 30 | 31 | org.springframework.boot 32 | spring-boot-starter-web 33 | 1.2.3.RELEASE 34 | 35 | 36 | ch.qos.logback 37 | * 38 | 39 | 40 | 41 | 42 | org.springframework.boot 43 | spring-boot-starter-actuator 44 | 1.2.3.RELEASE 45 | 46 | 47 | javax.servlet 48 | servlet-api 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | org.springframework.boot 58 | spring-boot-maven-plugin 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/appmaster/src/main/java/hello/appmaster/AppmasterApplication.java: -------------------------------------------------------------------------------- 1 | package hello.appmaster; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class AppmasterApplication { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(AppmasterApplication.class, args); 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/appmaster/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | server: 2 | port: 0 3 | endpoints: 4 | shutdown: 5 | enabled: true 6 | spring: 7 | hadoop: 8 | fsUri: hdfs://localhost:8020 9 | resourceManagerHost: localhost 10 | yarn: 11 | appType: BOOT 12 | appName: yarn-store-groups 13 | applicationBaseDir: /app/ 14 | appmaster: 15 | appmasterClass: org.springframework.yarn.am.cluster.ManagedContainerClusterAppmaster 16 | keepContextAlive: true 17 | containercluster: 18 | enabled: true 19 | clusters: 20 | store: 21 | projection: 22 | type: default 23 | data: 24 | any: 1 25 | resource: 26 | priority: 10 27 | memory: 64 28 | virtualCores: 1 29 | launchcontext: 30 | locality: false 31 | archiveFile: container-0.1.0.jar 32 | endpoints: 33 | containercluster: 34 | enabled: true 35 | containerregister: 36 | enabled: true 37 | 38 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/appmaster/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootCategory=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d %p [%C{1}] - %m%n 6 | 7 | log4j.category.org.springframework.yarn=DEBUG 8 | log4j.category.org.springframework.boot=INFO 9 | log4j.category.hello.appmaster=INFO 10 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | repositories { 3 | maven { url "http://repo.spring.io/libs-release" } 4 | } 5 | dependencies { 6 | classpath("org.springframework.boot:spring-boot-gradle-plugin:1.2.3.RELEASE") 7 | } 8 | } 9 | 10 | allprojects { 11 | apply plugin: 'base' 12 | } 13 | 14 | subprojects { subproject -> 15 | apply plugin: 'java' 16 | apply plugin: 'eclipse' 17 | apply plugin: 'idea' 18 | version = '0.1.0' 19 | repositories { 20 | mavenCentral() 21 | maven { url "http://repo.spring.io/libs-release" } 22 | } 23 | task copyJars(type: Copy) { 24 | from "$buildDir/libs" 25 | into "$rootDir/dist/target/dist/" 26 | include "**/*.jar" 27 | } 28 | assemble.doLast {copyJars.execute()} 29 | 30 | configurations { 31 | all*.exclude group: 'ch.qos.logback' 32 | all*.exclude group: 'javax.servlet', module: 'servlet-api', version: '2.5' 33 | } 34 | 35 | } 36 | 37 | project('client') { 38 | apply plugin: 'spring-boot' 39 | dependencies { 40 | compile("org.springframework.data:spring-yarn-boot-cli:2.2.0.RELEASE") 41 | } 42 | } 43 | 44 | project('appmaster') { 45 | apply plugin: 'spring-boot' 46 | dependencies { 47 | compile("org.springframework.data:spring-yarn-boot:2.2.0.RELEASE") 48 | compile("org.springframework.boot:spring-boot-starter-web:1.2.3.RELEASE") 49 | compile("org.springframework.boot:spring-boot-starter-actuator:1.2.3.RELEASE") 50 | } 51 | } 52 | 53 | project('container') { 54 | apply plugin: 'spring-boot' 55 | dependencies { 56 | compile("org.springframework.data:spring-yarn-boot:2.2.0.RELEASE") 57 | compile("org.springframework.data:spring-data-hadoop-store:2.2.0.RELEASE") 58 | compile("org.springframework.boot:spring-boot-starter-web:1.2.3.RELEASE") 59 | compile("org.springframework.boot:spring-boot-starter-actuator:1.2.3.RELEASE") 60 | } 61 | } 62 | 63 | project('dist') { 64 | dependencies { 65 | compile project(":client") 66 | compile project(":appmaster") 67 | compile project(":container") 68 | } 69 | clean.doLast {ant.delete(dir: "target")} 70 | jar.enabled = false 71 | } 72 | 73 | task wrapper(type: Wrapper) { 74 | gradleVersion = '1.11' 75 | } 76 | 77 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/client/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | org.springframework 9 | client 10 | 0.1.0 11 | 12 | 13 | org.springframework 14 | yarn-store-groups 15 | 0.1.0 16 | 17 | 18 | 19 | 20 | org.springframework.data 21 | spring-yarn-boot-cli 22 | 2.2.0.RELEASE 23 | 24 | 25 | 26 | 27 | 28 | 29 | org.springframework.boot 30 | spring-boot-maven-plugin 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/client/src/main/java/hello/client/ClientApplication.java: -------------------------------------------------------------------------------- 1 | package hello.client; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.springframework.boot.cli.command.Command; 7 | import org.springframework.yarn.boot.cli.AbstractCli; 8 | import org.springframework.yarn.boot.cli.YarnClusterCreateCommand; 9 | import org.springframework.yarn.boot.cli.YarnClusterDestroyCommand; 10 | import org.springframework.yarn.boot.cli.YarnClusterInfoCommand; 11 | import org.springframework.yarn.boot.cli.YarnClusterModifyCommand; 12 | import org.springframework.yarn.boot.cli.YarnClusterStartCommand; 13 | import org.springframework.yarn.boot.cli.YarnClusterStopCommand; 14 | import org.springframework.yarn.boot.cli.YarnClustersInfoCommand; 15 | import org.springframework.yarn.boot.cli.YarnKillCommand; 16 | import org.springframework.yarn.boot.cli.YarnPushCommand; 17 | import org.springframework.yarn.boot.cli.YarnPushedCommand; 18 | import org.springframework.yarn.boot.cli.YarnShutdownCommand; 19 | import org.springframework.yarn.boot.cli.YarnSubmitCommand; 20 | import org.springframework.yarn.boot.cli.YarnSubmittedCommand; 21 | import org.springframework.yarn.boot.cli.shell.ShellCommand; 22 | 23 | public class ClientApplication extends AbstractCli { 24 | 25 | public static void main(String... args) { 26 | List commands = new ArrayList(); 27 | commands.add(new YarnPushCommand()); 28 | commands.add(new YarnPushedCommand()); 29 | commands.add(new YarnSubmitCommand()); 30 | commands.add(new YarnSubmittedCommand()); 31 | commands.add(new YarnKillCommand()); 32 | commands.add(new YarnShutdownCommand()); 33 | commands.add(new YarnClustersInfoCommand()); 34 | commands.add(new YarnClusterInfoCommand()); 35 | commands.add(new YarnClusterCreateCommand()); 36 | commands.add(new YarnClusterStartCommand()); 37 | commands.add(new YarnClusterStopCommand()); 38 | commands.add(new YarnClusterModifyCommand()); 39 | commands.add(new YarnClusterDestroyCommand()); 40 | ClientApplication app = new ClientApplication(); 41 | app.registerCommands(commands); 42 | app.registerCommand(new ShellCommand(commands)); 43 | app.doMain(args); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/client/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | spring: 2 | main: 3 | show_banner: false 4 | hadoop: 5 | fsUri: hdfs://localhost:8020 6 | resourceManagerHost: localhost 7 | yarn: 8 | appType: BOOT 9 | appName: yarn-store-groups 10 | applicationBaseDir: /app/ 11 | client: 12 | clientClass: org.springframework.yarn.client.DefaultApplicationYarnClient 13 | files: 14 | - "file:dist/target/dist/appmaster-0.1.0.jar" 15 | - "file:dist/target/dist/container-0.1.0.jar" 16 | launchcontext: 17 | archiveFile: appmaster-0.1.0.jar 18 | resource: 19 | memory: 1g 20 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/client/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootCategory=WARN, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d %p [%C{1}] - %m%n 6 | 7 | log4j.category.org.apache.hadoop=OFF 8 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/container/src/main/resources/application.yml: -------------------------------------------------------------------------------- 1 | server: 2 | port: 0 3 | endpoints: 4 | shutdown: 5 | enabled: true 6 | spring: 7 | hadoop: 8 | fsUri: hdfs://localhost:8020 9 | resourceManagerHost: localhost 10 | 11 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/container/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootCategory=INFO, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d %p [%C{1}] [%t] - %m%n 6 | 7 | log4j.category.org.springframework.context=DEBUG 8 | log4j.category.org.springframework.beans=DEBUG 9 | log4j.category.org.springframework.yarn=DEBUG 10 | log4j.category.org.springframework.data.hadoop.store=INFO 11 | log4j.category.org.springframework.boot=DEBUG 12 | log4j.category.hello.container=INFO 13 | 14 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/dist/assembly.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | dist 6 | 7 | dir 8 | 9 | false 10 | 11 | 12 | true 13 | 14 | org.springframework:client 15 | org.springframework:appmaster 16 | org.springframework:container 17 | 18 | 19 | false 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/dist/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | 8 | org.springframework 9 | dist 10 | 0.1.0 11 | 12 | 13 | org.springframework 14 | yarn-store-groups 15 | 0.1.0 16 | 17 | 18 | 19 | 20 | org.springframework 21 | client 22 | 0.1.0 23 | 24 | 25 | org.springframework 26 | appmaster 27 | 0.1.0 28 | 29 | 30 | org.springframework 31 | container 32 | 0.1.0 33 | 34 | 35 | 36 | 37 | 38 | 39 | maven-assembly-plugin 40 | 41 | 42 | distro-assembly 43 | package 44 | 45 | single 46 | 47 | 48 | ${project.name} 49 | false 50 | 51 | 52 | 53 | 54 | 55 | maven-failsafe-plugin 56 | 57 | 58 | package 59 | 60 | integration-test 61 | verify 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spring-attic/spring-hadoop-samples/18e6641b2ae6830753abc9b0e7a3e45ade814e0e/boot/yarn-store-groups/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /boot/yarn-store-groups/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Fri Feb 14 10:46:03 GMT 2014 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=http\://services.gradle.org/distributions/gradle-1.11-bin.zip 7 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 12 | set DEFAULT_JVM_OPTS= 13 | 14 | set DIRNAME=%~dp0 15 | if "%DIRNAME%" == "" set DIRNAME=. 16 | set APP_BASE_NAME=%~n0 17 | set APP_HOME=%DIRNAME% 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windowz variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | if "%@eval[2+2]" == "4" goto 4NT_args 53 | 54 | :win9xME_args 55 | @rem Slurp the command line arguments. 56 | set CMD_LINE_ARGS= 57 | set _SKIP=2 58 | 59 | :win9xME_args_slurp 60 | if "x%~1" == "x" goto execute 61 | 62 | set CMD_LINE_ARGS=%* 63 | goto execute 64 | 65 | :4NT_args 66 | @rem Get arguments from the 4NT Shell from JP Software 67 | set CMD_LINE_ARGS=%$ 68 | 69 | :execute 70 | @rem Setup the command line 71 | 72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if "%ERRORLEVEL%"=="0" goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 85 | exit /b 1 86 | 87 | :mainEnd 88 | if "%OS%"=="Windows_NT" endlocal 89 | 90 | :omega 91 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.springframework 8 | yarn-store-groups 9 | 0.1.0 10 | pom 11 | 12 | 13 | org.springframework.boot 14 | spring-boot-starter-parent 15 | 1.2.3.RELEASE 16 | 17 | 18 | 19 | container 20 | appmaster 21 | client 22 | dist 23 | 24 | 25 | 26 | 27 | org.springframework.data 28 | spring-yarn-boot 29 | 2.2.0.RELEASE 30 | 31 | 32 | 33 | 34 | 35 | 36 | maven-compiler-plugin 37 | 2.3.2 38 | 39 | 40 | maven-assembly-plugin 41 | 2.4 42 | 43 | 44 | assembly.xml 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | spring-release 54 | http://repo.spring.io/libs-release 55 | false 56 | 57 | 58 | 59 | 60 | 61 | spring-release 62 | http://repo.spring.io/libs-release 63 | false 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /boot/yarn-store-groups/settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'yarn-store-groups' 2 | include 'client','appmaster','container','dist' 3 | -------------------------------------------------------------------------------- /dataset/.gitignore: -------------------------------------------------------------------------------- 1 | /.idea/ 2 | *.ipr 3 | *.iws 4 | -------------------------------------------------------------------------------- /dataset/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | org.springframework.samples 6 | spring-hadoop-samples-dataset 7 | 1.0.0.BUILD-SNAPSHOT 8 | 9 | Spring Hadoop Samples - Dataset 10 | 11 | 12 | org.springframework.boot 13 | spring-boot-starter-parent 14 | 1.1.0.RELEASE 15 | 16 | 17 | 18 | org.springframework.samples.hadoop.dataset.MyFilesApp 19 | 1.7 20 | 21 | 22 | 23 | 24 | org.springframework.boot 25 | spring-boot-starter 26 | 27 | 28 | org.springframework.data 29 | spring-data-hadoop 30 | 2.0.0.RELEASE 31 | 32 | 33 | org.springframework.data 34 | spring-data-hadoop-store 35 | 2.0.0.RELEASE 36 | 37 | 38 | org.xerial.snappy 39 | snappy-java 40 | 1.1.0 41 | runtime 42 | 43 | 44 | 45 | 46 | spring-milestones 47 | http://repo.spring.io/libs-release 48 | 49 | 50 | 51 | 52 | 53 | 54 | org.springframework.boot 55 | spring-boot-maven-plugin 56 | 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /dataset/src/main/java/org/springframework/samples/hadoop/dataset/DatasetConfig.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.dataset; 2 | 3 | import org.kitesdk.data.Formats; 4 | import org.springframework.beans.factory.annotation.Autowired; 5 | import org.springframework.context.annotation.Bean; 6 | import org.springframework.context.annotation.Configuration; 7 | import org.springframework.context.annotation.ImportResource; 8 | import org.springframework.data.hadoop.store.DataStoreWriter; 9 | import org.springframework.data.hadoop.store.dataset.*; 10 | 11 | import java.util.Arrays; 12 | 13 | @Configuration 14 | @ImportResource("hadoop-context.xml") 15 | public class DatasetConfig { 16 | 17 | private @Autowired org.apache.hadoop.conf.Configuration hadoopConfiguration; 18 | 19 | @Bean 20 | public DatasetRepositoryFactory datasetRepositoryFactory() { 21 | DatasetRepositoryFactory datasetRepositoryFactory = new DatasetRepositoryFactory(); 22 | datasetRepositoryFactory.setConf(hadoopConfiguration); 23 | datasetRepositoryFactory.setBasePath("/user/spring"); 24 | return datasetRepositoryFactory; 25 | } 26 | 27 | @Bean 28 | public DataStoreWriter dataStoreWriter() { 29 | return new AvroPojoDatasetStoreWriter(FileInfo.class, datasetRepositoryFactory(), fileInfoDatasetDefinition()); 30 | } 31 | 32 | @Bean 33 | public DatasetOperations datasetOperations() { 34 | DatasetTemplate datasetOperations = new DatasetTemplate(); 35 | datasetOperations.setDatasetDefinitions(Arrays.asList(fileInfoDatasetDefinition())); 36 | datasetOperations.setDatasetRepositoryFactory(datasetRepositoryFactory()); 37 | return datasetOperations; 38 | } 39 | 40 | @Bean 41 | public DatasetDefinition fileInfoDatasetDefinition() { 42 | DatasetDefinition definition = new DatasetDefinition(); 43 | definition.setFormat(Formats.AVRO.getName()); 44 | definition.setTargetClass(FileInfo.class); 45 | definition.setAllowNullValues(false); 46 | return definition; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /dataset/src/main/java/org/springframework/samples/hadoop/dataset/FileInfo.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.dataset; 2 | 3 | import org.apache.avro.reflect.Nullable; 4 | 5 | public class FileInfo { 6 | private String name; 7 | private @Nullable String path; 8 | private long size; 9 | private long modified; 10 | 11 | public FileInfo(String name, String path, long size, long modified) { 12 | this.name = name; 13 | this.path = path; 14 | this.size = size; 15 | this.modified = modified; 16 | } 17 | 18 | public FileInfo() { 19 | } 20 | 21 | public String getName() { 22 | return name; 23 | } 24 | 25 | public String getPath() { 26 | return path; 27 | } 28 | 29 | public long getSize() { 30 | return size; 31 | } 32 | 33 | public long getModified() { 34 | return modified; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /dataset/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.hadoop.fsUri=hdfs://localhost:8020 2 | -------------------------------------------------------------------------------- /dataset/src/main/resources/hadoop-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | fs.defaultFS=${spring.hadoop.fsUri} 10 | 11 | 12 | -------------------------------------------------------------------------------- /dataset/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /hbase/README.asciidoc: -------------------------------------------------------------------------------- 1 | == Hbase example 2 | 3 | A HbaseTemplate-based example of a UserRepository used for adding and accessing User records. 4 | 5 | === Building and running 6 | 7 | Use the following commands to build and run the sample 8 | 9 | $ mvn clean package 10 | $ sh ./target/appassembler/bin/usercount 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /hbase/src/main/config/log4j.properties: -------------------------------------------------------------------------------- 1 | # Direct log messages to stdout 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target=System.out 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %40.40c:%4L - %m%n 6 | 7 | # Root logger option 8 | log4j.rootLogger=INFO, stdout 9 | 10 | #log4j.logger.org.springframework.data.hadoop=DEBUG 11 | -------------------------------------------------------------------------------- /hbase/src/main/java/org/springframework/samples/hadoop/hbase/User.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.hbase; 2 | 3 | public class User { 4 | 5 | private String name; 6 | private String email; 7 | private String password; 8 | 9 | public User(String name, String email, String password) { 10 | super(); 11 | this.name = name; 12 | this.email = email; 13 | this.password = password; 14 | } 15 | 16 | public String getName() { 17 | return name; 18 | } 19 | 20 | public String getEmail() { 21 | return email; 22 | } 23 | 24 | public String getPassword() { 25 | return password; 26 | } 27 | 28 | @Override 29 | public String toString() { 30 | return "User [name=" + name + ", email=" + email + ", password=" 31 | + password + "]"; 32 | } 33 | 34 | 35 | } 36 | -------------------------------------------------------------------------------- /hbase/src/main/java/org/springframework/samples/hadoop/hbase/UserApp.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2012 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.samples.hadoop.hbase; 17 | 18 | import java.util.List; 19 | 20 | import org.apache.commons.logging.Log; 21 | import org.apache.commons.logging.LogFactory; 22 | import org.springframework.context.support.AbstractApplicationContext; 23 | import org.springframework.context.support.ClassPathXmlApplicationContext; 24 | 25 | public class UserApp { 26 | 27 | private static final Log log = LogFactory.getLog(UserApp.class); 28 | 29 | public static void main(String[] args) throws Exception { 30 | AbstractApplicationContext context = new ClassPathXmlApplicationContext( 31 | "/META-INF/spring/application-context.xml", UserApp.class); 32 | log.info("HBase Application Running"); 33 | context.registerShutdownHook(); 34 | 35 | UserUtils userUtils = context.getBean(UserUtils.class); 36 | userUtils.initialize(); 37 | userUtils.addUsers(); 38 | 39 | UserRepository userRepository = context.getBean(UserRepository.class); 40 | List users = userRepository.findAll(); 41 | System.out.println("Number of users = " + users.size()); 42 | System.out.println(users); 43 | 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /hbase/src/main/java/org/springframework/samples/hadoop/hbase/UserRepository.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.hbase; 2 | 3 | import java.util.List; 4 | 5 | import org.apache.hadoop.hbase.client.HTableInterface; 6 | import org.apache.hadoop.hbase.client.Put; 7 | import org.apache.hadoop.hbase.client.Result; 8 | import org.apache.hadoop.hbase.util.Bytes; 9 | import org.springframework.beans.factory.annotation.Autowired; 10 | import org.springframework.data.hadoop.hbase.HbaseTemplate; 11 | import org.springframework.data.hadoop.hbase.RowMapper; 12 | import org.springframework.data.hadoop.hbase.TableCallback; 13 | import org.springframework.stereotype.Repository; 14 | 15 | @Repository 16 | public class UserRepository { 17 | 18 | @Autowired 19 | private HbaseTemplate hbaseTemplate; 20 | 21 | private String tableName = "users"; 22 | 23 | public static byte[] CF_INFO = Bytes.toBytes("cfInfo"); 24 | 25 | private byte[] qUser = Bytes.toBytes("user"); 26 | private byte[] qEmail = Bytes.toBytes("email"); 27 | private byte[] qPassword = Bytes.toBytes("password"); 28 | 29 | public List findAll() { 30 | return hbaseTemplate.find(tableName, "cfInfo", new RowMapper() { 31 | @Override 32 | public User mapRow(Result result, int rowNum) throws Exception { 33 | return new User(Bytes.toString(result.getValue(CF_INFO, qUser)), 34 | Bytes.toString(result.getValue(CF_INFO, qEmail)), 35 | Bytes.toString(result.getValue(CF_INFO, qPassword))); 36 | } 37 | }); 38 | 39 | } 40 | 41 | public User save(final String userName, final String email, 42 | final String password) { 43 | return hbaseTemplate.execute(tableName, new TableCallback() { 44 | public User doInTable(HTableInterface table) throws Throwable { 45 | User user = new User(userName, email, password); 46 | Put p = new Put(Bytes.toBytes(user.getName())); 47 | p.add(CF_INFO, qUser, Bytes.toBytes(user.getName())); 48 | p.add(CF_INFO, qEmail, Bytes.toBytes(user.getEmail())); 49 | p.add(CF_INFO, qPassword, Bytes.toBytes(user.getPassword())); 50 | table.put(p); 51 | return user; 52 | 53 | } 54 | }); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /hbase/src/main/java/org/springframework/samples/hadoop/hbase/UserUtils.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.hbase; 2 | 3 | import java.io.IOException; 4 | 5 | import javax.annotation.Resource; 6 | 7 | import org.apache.hadoop.conf.Configuration; 8 | import org.apache.hadoop.hbase.HColumnDescriptor; 9 | import org.apache.hadoop.hbase.HTableDescriptor; 10 | import org.apache.hadoop.hbase.client.HBaseAdmin; 11 | import org.apache.hadoop.hbase.client.HTable; 12 | import org.apache.hadoop.hbase.client.Put; 13 | import org.apache.hadoop.hbase.util.Bytes; 14 | import org.springframework.beans.factory.InitializingBean; 15 | import org.springframework.beans.factory.annotation.Autowired; 16 | import org.springframework.data.hadoop.hbase.HbaseTemplate; 17 | import org.springframework.data.hadoop.hbase.TableCallback; 18 | import org.springframework.stereotype.Component; 19 | import org.springframework.samples.hadoop.hbase.*; 20 | 21 | @Component 22 | public class UserUtils implements InitializingBean { 23 | 24 | private String tableName = "users"; 25 | private byte[] tableNameAsBytes = Bytes.toBytes("users"); 26 | 27 | @Resource(name = "hbaseConfiguration") 28 | private Configuration config; 29 | 30 | @Autowired 31 | private HbaseTemplate hbaseTemplate; 32 | 33 | @Autowired 34 | private UserRepository userRepository; 35 | 36 | private HBaseAdmin admin; 37 | 38 | public void initialize() throws IOException { 39 | 40 | if (admin.tableExists(tableNameAsBytes)) { 41 | if (!admin.isTableDisabled(tableNameAsBytes)) { 42 | System.out.printf("Disabling %s\n", tableName); 43 | admin.disableTable(tableNameAsBytes); 44 | } 45 | System.out.printf("Deleting %s\n", tableName); 46 | admin.deleteTable(tableNameAsBytes); 47 | } 48 | 49 | HTableDescriptor tableDescriptor = new HTableDescriptor(tableName); 50 | HColumnDescriptor columnDescriptor = new HColumnDescriptor( 51 | UserRepository.CF_INFO); 52 | tableDescriptor.addFamily(columnDescriptor); 53 | 54 | admin.createTable(tableDescriptor); 55 | 56 | } 57 | 58 | public void addUsers() { 59 | for (int i = 0; i < 10; i++) { 60 | userRepository.save("user" + i,"user" + i + "@yahoo.com", "password" + i); 61 | } 62 | } 63 | 64 | @Override 65 | public void afterPropertiesSet() throws Exception { 66 | admin = new HBaseAdmin(config); 67 | } 68 | 69 | } 70 | 71 | -------------------------------------------------------------------------------- /hbase/src/main/resources/META-INF/spring/application-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | fs.defaultFS=hdfs://localhost:8020 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /hbase/src/main/resources/hbase.properties: -------------------------------------------------------------------------------- 1 | hbase.zk.host=localhost 2 | hbase.zk.port=2181 3 | 4 | -------------------------------------------------------------------------------- /hive-batch/.gitignore: -------------------------------------------------------------------------------- 1 | /.idea/ 2 | /results.txt 3 | -------------------------------------------------------------------------------- /hive-batch/README.asciidoc: -------------------------------------------------------------------------------- 1 | == Hive batch sample 2 | 3 | Sample batch application that uses Hive to do a simple Twitter search analysis 4 | 5 | === Building and running 6 | 7 | This sample is configured to run with Apache Hadoop 2.6.0 stable release and Hive 0.13.1. 8 | 9 | You need to have a Hive server running for this app - see the https://cwiki.apache.org/confluence/display/Hive/GettingStarted[Hive Wiki] 10 | 11 | Quick steps to start Hive server (in a separate terminal window): 12 | 13 | #(You need to have the Hadoop environment set up first) 14 | $ cd {dir where hive binary download is extracted} 15 | $ export HIVE_HOME=`pwd` 16 | $ export PATH=$HIVE_HOME/bin:$PATH 17 | $ hive --service hiveserver 18 | 19 | Use the following commands to build and run the sample 20 | 21 | $ mvn clean package 22 | $ sh ./target/appassembler/bin/hiveBatchApp 23 | 24 | You should see lots of logging messages and also some output that resembles the following: 25 | 26 | ``` 27 | RESULTS from /tweets/hiveout 28 | NBA 91 29 | CoachBillick 89 30 | espn 69 31 | SportsCenter 42 32 | TheOnion 41 33 | ESPNStatsInfo 41 34 | TMZ 39 35 | darrenrovell 37 36 | BleacherReport 37 37 | MeridianoTV 37 38 | ``` 39 | 40 | -------------------------------------------------------------------------------- /hive-batch/src/main/config/log4j.properties: -------------------------------------------------------------------------------- 1 | # Direct log messages to stdout 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target=System.out 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %40.40c:%4L - %m%n 6 | 7 | # Root logger option 8 | log4j.rootLogger=ERROR, stdout 9 | 10 | log4j.logger.org.springframework=INFO 11 | log4j.logger.org.apache.thrift.server=OFF 12 | log4j.logger.hive.ql.metadata.Hive=OFF 13 | log4j.logger.org.datanucleus=ERROR 14 | log4j.logger.DataNucleus=OFF 15 | 16 | -------------------------------------------------------------------------------- /hive-batch/src/main/java/org/springframework/samples/hadoop/hive/HiveBatchApp.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2014 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.samples.hadoop.hive; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.batch.core.Job; 21 | import org.springframework.batch.core.JobParameters; 22 | import org.springframework.batch.core.JobParametersInvalidException; 23 | import org.springframework.batch.core.launch.JobLauncher; 24 | import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; 25 | import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; 26 | import org.springframework.batch.core.repository.JobRestartException; 27 | import org.springframework.context.support.AbstractApplicationContext; 28 | import org.springframework.context.support.ClassPathXmlApplicationContext; 29 | 30 | public class HiveBatchApp { 31 | 32 | private static final Log log = LogFactory.getLog(HiveBatchApp.class); 33 | 34 | public static void main(String[] args) throws JobParametersInvalidException, JobExecutionAlreadyRunningException, JobRestartException, JobInstanceAlreadyCompleteException { 35 | System.out.println("TEST"); 36 | AbstractApplicationContext context = new ClassPathXmlApplicationContext("classpath:/META-INF/spring/*-context.xml"); 37 | log.info("Batch Tweet Influencers Hive Job Running"); 38 | context.registerShutdownHook(); 39 | 40 | JobLauncher jobLauncher = context.getBean(JobLauncher.class); 41 | Job job = context.getBean(Job.class); 42 | jobLauncher.run(job, new JobParameters()); 43 | 44 | } 45 | } -------------------------------------------------------------------------------- /hive-batch/src/main/resources/META-INF/spring/batch-common-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /hive-batch/src/main/resources/META-INF/spring/hive-batch-context.xml: -------------------------------------------------------------------------------- 1 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | fs.defaultFS=${hd.fs} 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /hive-batch/src/main/resources/file-prep.groovy: -------------------------------------------------------------------------------- 1 | // requires three variables, localSourceFile and inputDir, outputDir 2 | // use the shell (made available under variable fsh) 3 | 4 | if (!fsh.test(inputDir)) { 5 | fsh.mkdir(inputDir); 6 | fsh.copyFromLocal(localSourceFile, inputDir); 7 | fsh.chmod(700, inputDir) 8 | } 9 | -------------------------------------------------------------------------------- /hive-batch/src/main/resources/hadoop.properties: -------------------------------------------------------------------------------- 1 | hd.fs=hdfs://localhost:8020 2 | 3 | localSourceFile=data/nbatweets-small.txt 4 | tweets.input.path=/tweets/input 5 | -------------------------------------------------------------------------------- /hive-batch/src/main/resources/hive.properties: -------------------------------------------------------------------------------- 1 | hive.host=localhost 2 | hive.port=10000 3 | -------------------------------------------------------------------------------- /hive-batch/src/main/resources/results.groovy: -------------------------------------------------------------------------------- 1 | //requires outputDir 2 | 3 | // use the shell (made available under variable fsh) 4 | println "RESULTS from " + outputDir 5 | old = new File('results.txt') 6 | if( old.exists() ) { 7 | old.delete() 8 | } 9 | fsh.get(outputDir + '/*', 'results.txt'); 10 | String fileContents = new File('results.txt').text 11 | println fileContents 12 | -------------------------------------------------------------------------------- /hive-batch/src/main/resources/set-hive-permissions.groovy: -------------------------------------------------------------------------------- 1 | // Setup hive directories and permissions in case they aren't there already 2 | 3 | // use the shell (made available under variable fsh) 4 | if (!fsh.test("/tmp")) { 5 | fsh.mkdir("/tmp") 6 | fsh.chmod("a+w", "/tmp") 7 | } 8 | if (!fsh.test("/user/hive/warehouse")) { 9 | fsh.mkdir("/user/hive/warehouse") 10 | fsh.chmod("a+w", "/user/hive/warehouse") 11 | } 12 | -------------------------------------------------------------------------------- /hive-batch/src/main/resources/tweet-influencers.hql: -------------------------------------------------------------------------------- 1 | create external table if not exists tweetdata (value STRING) LOCATION '/tweets/input'; 2 | 3 | insert overwrite directory '/tweets/hiveout' 4 | select r.retweetedUser, '\t', count(r.retweetedUser) as count 5 | from tweetdata j 6 | lateral view json_tuple(j.value, 'retweet', 'retweetedStatus') t as retweet, retweetedStatus 7 | lateral view json_tuple(t.retweetedStatus, 'fromUser') r as retweetedUser 8 | where t.retweet = 'true' 9 | group by r.retweetedUser order by count desc limit 10; 10 | 11 | -------------------------------------------------------------------------------- /hive/.gitignore: -------------------------------------------------------------------------------- 1 | /metastore_db/ 2 | /TempStatsStore/ 3 | /*.log 4 | /.settings/ 5 | /target/ 6 | /.idea/ 7 | /hive_uri_hits/ 8 | /*.iml 9 | -------------------------------------------------------------------------------- /hive/README.asciidoc: -------------------------------------------------------------------------------- 1 | == Hive samples 2 | 3 | There are three sample applications that use Hive. The examples are 4 | 5 | * A simple /etc/passwd file analysis application using HiveTemplate via the Thrift API. 6 | * A simple /etc/passwd file analysis application using HiveClient via the Thrift API. 7 | * A more complex Apache Log file analysis example using HiveRunner 8 | 9 | === Building and running 10 | 11 | This sample is configured to run with Apache Hadoop 2.6.0 stable release. 12 | 13 | Use the following commands to build and run the sample 14 | 15 | $ mvn clean package 16 | $ sh ./target/appassembler/bin/hiveApp 17 | $ sh ./target/appassembler/bin/hiveClientApp 18 | $ sh ./target/appassembler/bin/hiveAppWithApacheLogs 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /hive/data/passwd: -------------------------------------------------------------------------------- 1 | root:x:0:0:root:/root:/bin/bash 2 | bin:x:1:1:bin:/bin:/sbin/nologin 3 | daemon:x:2:2:daemon:/sbin:/sbin/nologin 4 | adm:x:3:4:adm:/var/adm:/sbin/nologin 5 | lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin 6 | sync:x:5:0:sync:/sbin:/bin/sync 7 | shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown 8 | halt:x:7:0:halt:/sbin:/sbin/halt 9 | mail:x:8:12:mail:/var/spool/mail:/sbin/nologin 10 | uucp:x:10:14:uucp:/var/spool/uucp:/sbin/nologin 11 | operator:x:11:0:operator:/root:/sbin/nologin 12 | games:x:12:100:games:/usr/games:/sbin/nologin 13 | gopher:x:13:30:gopher:/var/gopher:/sbin/nologin 14 | ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin 15 | nobody:x:99:99:Nobody:/:/sbin/nologin 16 | vcsa:x:69:69:virtual console memory owner:/dev:/sbin/nologin 17 | rpc:x:32:32:Rpcbind Daemon:/var/cache/rpcbind:/sbin/nologin 18 | saslauth:x:499:76:"Saslauthd user":/var/empty/saslauth:/sbin/nologin 19 | postfix:x:89:89::/var/spool/postfix:/sbin/nologin 20 | rpcuser:x:29:29:RPC Service User:/var/lib/nfs:/sbin/nologin 21 | nfsnobody:x:65534:65534:Anonymous NFS User:/var/lib/nfs:/sbin/nologin 22 | sshd:x:74:74:Privilege-separated SSH:/var/empty/sshd:/sbin/nologin 23 | vagrant:x:500:500:vagrant:/home/vagrant:/bin/bash 24 | vboxadd:x:498:1::/var/run/vboxadd:/bin/false 25 | dbus:x:81:81:System message bus:/:/sbin/nologin 26 | -------------------------------------------------------------------------------- /hive/src/main/config/log4j.properties: -------------------------------------------------------------------------------- 1 | # Direct log messages to stdout 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target=System.out 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %40.40c:%4L - %m%n 6 | 7 | # Root logger option 8 | log4j.rootLogger=ERROR, stdout 9 | 10 | log4j.logger.org.springframework=INFO 11 | log4j.logger.org.apache.thrift.server=OFF 12 | log4j.logger.hive.ql.metadata.Hive=OFF 13 | log4j.logger.org.datanucleus=ERROR 14 | log4j.logger.DataNucleus=OFF 15 | 16 | -------------------------------------------------------------------------------- /hive/src/main/java/org/springframework/samples/hadoop/hive/HiveApp.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2012 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.samples.hadoop.hive; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.context.support.AbstractApplicationContext; 21 | import org.springframework.context.support.ClassPathXmlApplicationContext; 22 | import org.springframework.data.hadoop.hive.HiveTemplate; 23 | 24 | public class HiveApp { 25 | 26 | private static final Log log = LogFactory.getLog(HiveApp.class); 27 | 28 | public static void main(String[] args) throws Exception { 29 | AbstractApplicationContext context = new ClassPathXmlApplicationContext( 30 | "/META-INF/spring/hive-context.xml", HiveApp.class); 31 | log.info("Hive Application Running"); 32 | context.registerShutdownHook(); 33 | 34 | HiveTemplate template = context.getBean(HiveTemplate.class); 35 | template.query("show tables;"); 36 | 37 | PasswordRepository repository = context.getBean(HiveTemplatePasswordRepository.class); 38 | repository.processPasswordFile("/user/hive/input/passwd"); 39 | log.info("Count of password entries = " + repository.count()); 40 | context.close(); 41 | log.info("Hive Application Completed"); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /hive/src/main/java/org/springframework/samples/hadoop/hive/HiveAppWithApacheLogs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2012 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.samples.hadoop.hive; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.context.support.AbstractApplicationContext; 21 | import org.springframework.context.support.ClassPathXmlApplicationContext; 22 | import org.springframework.data.hadoop.hive.HiveRunner; 23 | 24 | public class HiveAppWithApacheLogs { 25 | 26 | private static final Log log = LogFactory.getLog(HiveAppWithApacheLogs.class); 27 | 28 | public static void main(String[] args) throws Exception { 29 | AbstractApplicationContext context = new ClassPathXmlApplicationContext( 30 | "/META-INF/spring/hive-apache-log-context.xml", HiveAppWithApacheLogs.class); 31 | log.info("Hive Application Running"); 32 | context.registerShutdownHook(); 33 | 34 | HiveRunner runner = context.getBean(HiveRunner.class); 35 | runner.call(); 36 | 37 | context.close(); 38 | log.info("Hive Application Completed"); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /hive/src/main/java/org/springframework/samples/hadoop/hive/HiveClientApp.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2012 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.samples.hadoop.hive; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.context.support.AbstractApplicationContext; 21 | import org.springframework.context.support.ClassPathXmlApplicationContext; 22 | import org.springframework.data.hadoop.hive.HiveTemplate; 23 | 24 | public class HiveClientApp { 25 | 26 | private static final Log log = LogFactory.getLog(HiveClientApp.class); 27 | 28 | public static void main(String[] args) throws Exception { 29 | AbstractApplicationContext context = new ClassPathXmlApplicationContext( 30 | "/META-INF/spring/hive-context.xml", HiveClientApp.class); 31 | log.info("Hive Application Running"); 32 | context.registerShutdownHook(); 33 | 34 | HiveTemplate template = context.getBean(HiveTemplate.class); 35 | template.query("show tables;"); 36 | 37 | PasswordRepository repository = context.getBean(HiveClientPasswordRepository.class); 38 | repository.processPasswordFile("/user/hive/input/passwd"); 39 | log.info("Count of password entries = " + repository.count()); 40 | context.close(); 41 | log.info("Hive Application Completed"); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /hive/src/main/java/org/springframework/samples/hadoop/hive/HiveClientPasswordRepository.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.hive; 2 | 3 | import org.apache.commons.logging.Log; 4 | import org.apache.commons.logging.LogFactory; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.beans.factory.annotation.Value; 7 | import org.springframework.data.hadoop.hive.HiveClient; 8 | import org.springframework.data.hadoop.hive.HiveClientFactory; 9 | import org.springframework.stereotype.Repository; 10 | 11 | import java.sql.SQLException; 12 | 13 | @Repository 14 | public class HiveClientPasswordRepository implements PasswordRepository { 15 | 16 | private static final Log logger = LogFactory 17 | .getLog(HiveClientPasswordRepository.class); 18 | 19 | private HiveClientFactory hiveClientFactory; 20 | 21 | private @Value("${hive.table}") 22 | String tableName; 23 | 24 | @Autowired 25 | public HiveClientPasswordRepository(HiveClientFactory hiveClientFactory) { 26 | this.hiveClientFactory = hiveClientFactory; 27 | } 28 | 29 | @Override 30 | public Long count() { 31 | HiveClient hiveClient = createHiveClient(); 32 | try { 33 | return Long.parseLong(hiveClient.executeAndfetchOne("select count(*) from " + tableName)); 34 | } finally { 35 | try { 36 | hiveClient.shutdown(); 37 | } catch (SQLException tex) { 38 | logger.debug( 39 | "Unexpected exception on shutting down HiveClient", tex); 40 | } 41 | } 42 | } 43 | 44 | @Override 45 | public void processPasswordFile(String inputFile) { 46 | //TODO 47 | } 48 | 49 | protected HiveClient createHiveClient() { 50 | return hiveClientFactory.getHiveClient(); 51 | } 52 | 53 | private RuntimeException translateException(Exception ex) { 54 | return new RuntimeException(ex); 55 | } 56 | 57 | 58 | } 59 | -------------------------------------------------------------------------------- /hive/src/main/java/org/springframework/samples/hadoop/hive/HiveTemplatePasswordRepository.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.hive; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | import org.springframework.beans.factory.annotation.Autowired; 7 | import org.springframework.beans.factory.annotation.Value; 8 | import org.springframework.data.hadoop.hive.HiveOperations; 9 | import org.springframework.stereotype.Repository; 10 | 11 | @Repository 12 | public class HiveTemplatePasswordRepository implements PasswordRepository { 13 | 14 | private @Value("${hive.table}") String tableName; 15 | 16 | private HiveOperations hiveOperations; 17 | 18 | @Autowired 19 | public HiveTemplatePasswordRepository(HiveOperations hiveOperations) { 20 | this.hiveOperations = hiveOperations; 21 | } 22 | 23 | @Override 24 | public Long count() { 25 | return hiveOperations.queryForLong("select count(*) from " + tableName); 26 | } 27 | 28 | @Override 29 | public void processPasswordFile(String inputFile) { 30 | Map parameters = new HashMap(); 31 | parameters.put("inputFile", inputFile); 32 | hiveOperations.query("classpath:password-analysis.hql", parameters); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /hive/src/main/java/org/springframework/samples/hadoop/hive/PasswordRepository.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.hive; 2 | 3 | 4 | public interface PasswordRepository { 5 | 6 | Long count(); 7 | 8 | void processPasswordFile(String inputFile); 9 | 10 | } -------------------------------------------------------------------------------- /hive/src/main/resources/.gitignore: -------------------------------------------------------------------------------- 1 | /derby.log 2 | /metastore_db/ 3 | /TempStatsStore/ 4 | -------------------------------------------------------------------------------- /hive/src/main/resources/META-INF/spring/hive-apache-log-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 11 | 12 | 13 | fs.defaultFS=${hd.fs} 14 | yarn.resourcemanager.address=${hd.rm} 15 | mapreduce.framework.name=yarn 16 | mapreduce.jobhistory.address=${hd.jh} 17 | 18 | 19 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /hive/src/main/resources/META-INF/spring/hive-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | fs.defaultFS=${hd.fs} 16 | yarn.resourcemanager.address=${hd.rm} 17 | mapreduce.framework.name=yarn 18 | mapreduce.jobhistory.address=${hd.jh} 19 | 20 | 21 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 41 | 31 | 32 | 35 | 36 | -------------------------------------------------------------------------------- /mapreduce/src/main/resources/copy-files.groovy: -------------------------------------------------------------------------------- 1 | //requires three variables, localSourceFile and inputDir, outputDir 2 | 3 | // use the shell (made available under variable fsh) 4 | 5 | if (!fsh.test(inputDir)) { 6 | fsh.mkdir(inputDir); 7 | fsh.copyFromLocal(localSourceFile, inputDir); 8 | fsh.chmod(700, inputDir) 9 | } 10 | if (fsh.test(outputDir)) { 11 | fsh.rmr(outputDir) 12 | } -------------------------------------------------------------------------------- /mapreduce/src/main/resources/hadoop.properties: -------------------------------------------------------------------------------- 1 | hd.fs=hdfs://localhost:8020 2 | hd.rm=localhost:8032 3 | hd.jh=localhost:10020 4 | 5 | wordcount.input.path=/user/gutenberg/input/word/ 6 | wordcount.output.path=/user/gutenberg/output/word/ 7 | localSourceFile=data/nietzsche-chapter-1.txt 8 | -------------------------------------------------------------------------------- /mr-batch/.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | -------------------------------------------------------------------------------- /mr-batch/README.asciidoc: -------------------------------------------------------------------------------- 1 | == HDFS and MapReduce batch sample 2 | 3 | In this sample we will execute HDFS operations and a MapReduce job using Spring Batch. The MapReduce job counts the number of times a hashtag exists in a file containing tweets. The HDFS operations are to first copy a data files into HDFS and then to remove any existing files in the MapReduce job's output directory. After the MapReduce job completes the output is copied to the local files system so we can print the output to the console. 4 | 5 | === Building and running 6 | 7 | Use the following commands to build and run the sample 8 | 9 | $ mvn clean package 10 | $ sh ./target/appassembler/bin/mrBatchApp 11 | 12 | This example is configured to build and run against Hadoop 2.6.0. 13 | -------------------------------------------------------------------------------- /mr-batch/src/main/config/log4j.properties: -------------------------------------------------------------------------------- 1 | # Direct log messages to stdout 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target=System.out 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %40.40c:%4L - %m%n 6 | 7 | # Root logger option 8 | log4j.rootLogger=ERROR, stdout 9 | 10 | log4j.logger.org.springframework=INFO 11 | log4j.logger.org.apache.thrift.server=OFF 12 | log4j.logger.hive.ql.metadata.Hive=OFF 13 | log4j.logger.org.datanucleus=ERROR 14 | log4j.logger.DataNucleus=OFF 15 | 16 | -------------------------------------------------------------------------------- /mr-batch/src/main/java/org/springframework/samples/hadoop/mapreduce/MrBatchApp.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2014 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.samples.hadoop.mapreduce; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.batch.core.Job; 21 | import org.springframework.batch.core.JobParameters; 22 | import org.springframework.batch.core.JobParametersInvalidException; 23 | import org.springframework.batch.core.launch.JobLauncher; 24 | import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; 25 | import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; 26 | import org.springframework.batch.core.repository.JobRestartException; 27 | import org.springframework.context.support.AbstractApplicationContext; 28 | import org.springframework.context.support.ClassPathXmlApplicationContext; 29 | 30 | public class MrBatchApp { 31 | 32 | private static final Log log = LogFactory.getLog(MrBatchApp.class); 33 | 34 | public static void main(String[] args) throws JobParametersInvalidException, JobExecutionAlreadyRunningException, JobRestartException, JobInstanceAlreadyCompleteException { 35 | System.out.println("TEST"); 36 | AbstractApplicationContext context = new ClassPathXmlApplicationContext("classpath:/META-INF/spring/*-context.xml"); 37 | log.info("Batch Tweet Hashtag MR Job Running"); 38 | context.registerShutdownHook(); 39 | 40 | JobLauncher jobLauncher = context.getBean(JobLauncher.class); 41 | Job job = context.getBean(Job.class); 42 | jobLauncher.run(job, new JobParameters()); 43 | 44 | } 45 | } -------------------------------------------------------------------------------- /mr-batch/src/main/resources/META-INF/spring/batch-common-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /mr-batch/src/main/resources/file-prep.groovy: -------------------------------------------------------------------------------- 1 | // requires three variables, localSourceFile and inputDir, outputDir 2 | // use the shell (made available under variable fsh) 3 | 4 | if (!fsh.test(inputDir)) { 5 | fsh.mkdir(inputDir); 6 | fsh.copyFromLocal(localSourceFile, inputDir); 7 | fsh.chmod(700, inputDir) 8 | } 9 | 10 | if (fsh.test(outputDir)) { 11 | fsh.rmr(outputDir) 12 | } 13 | -------------------------------------------------------------------------------- /mr-batch/src/main/resources/hadoop.properties: -------------------------------------------------------------------------------- 1 | hd.fs=hdfs://localhost:8020 2 | 3 | localSourceFile=data/nbatweets-small.txt 4 | tweets.input.path=/tweets/input 5 | tweets.output.path=/tweets/output 6 | -------------------------------------------------------------------------------- /mr-batch/src/main/resources/results.groovy: -------------------------------------------------------------------------------- 1 | //requires outputDir 2 | 3 | // use the shell (made available under variable fsh) 4 | println "RESULTS from " + outputDir 5 | old = new File('results.txt') 6 | if( old.exists() ) { 7 | old.delete() 8 | } 9 | fsh.get(outputDir + '/part-r-*', 'results.txt'); 10 | String fileContents = new File('results.txt').text 11 | println fileContents 12 | -------------------------------------------------------------------------------- /pig/.gitignore: -------------------------------------------------------------------------------- 1 | /.settings/ 2 | /.idea/ 3 | /target/ 4 | /*.iml 5 | -------------------------------------------------------------------------------- /pig/README.asciidoc: -------------------------------------------------------------------------------- 1 | == Pig samples 2 | 3 | There are three sample applications that use Pig, each sample getting more sophisticated in its use of Pig. The examples are 4 | 5 | * A simple /etc/passwd file analysis application using PigRunner 6 | * Using PigTemplate to execute Pig scripts 7 | * A more complex Apache Log file analysis example using PigRunner 8 | 9 | === Building and running 10 | 11 | Use the following commands to build and run the sample 12 | 13 | $ mvn clean package 14 | $ sh ./target/appassembler/bin/pigApp 15 | $ sh ./target/appassembler/bin/pigAppWithRepository 16 | $ sh ./target/appassembler/bin/pigAppWithApacheLogs 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /pig/lib/piggybank-0.9.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spring-attic/spring-hadoop-samples/18e6641b2ae6830753abc9b0e7a3e45ade814e0e/pig/lib/piggybank-0.9.2.jar -------------------------------------------------------------------------------- /pig/src/main/config/log4j.properties: -------------------------------------------------------------------------------- 1 | # Direct log messages to stdout 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target=System.out 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %40.40c:%4L - %m%n 6 | 7 | # Root logger option 8 | log4j.rootLogger=INFO, stdout 9 | 10 | #log4j.logger.org.springframework.data.hadoop=DEBUG 11 | -------------------------------------------------------------------------------- /pig/src/main/java/org/springframework/samples/hadoop/pig/PasswordRepository.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.pig; 2 | 3 | import java.util.Collection; 4 | 5 | public interface PasswordRepository { 6 | 7 | public abstract void processPasswordFile(String inputFile); 8 | 9 | public abstract void processPasswordFiles(Collection inputFiles); 10 | 11 | } -------------------------------------------------------------------------------- /pig/src/main/java/org/springframework/samples/hadoop/pig/PasswordService.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.pig; 2 | 3 | import org.springframework.integration.annotation.Header; 4 | import org.springframework.integration.annotation.ServiceActivator; 5 | 6 | public class PasswordService { 7 | 8 | private PasswordRepository passwordRepository; 9 | 10 | public PasswordService(PasswordRepository passwordRepository) { 11 | this.passwordRepository = passwordRepository; 12 | } 13 | 14 | @ServiceActivator 15 | public void process(@Header("hdfs_path") String inputDir) { 16 | passwordRepository.processPasswordFile(inputDir); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /pig/src/main/java/org/springframework/samples/hadoop/pig/PigApp.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2012 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.samples.hadoop.pig; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.context.support.AbstractApplicationContext; 21 | import org.springframework.context.support.ClassPathXmlApplicationContext; 22 | 23 | public class PigApp { 24 | 25 | private static final Log log = LogFactory.getLog(PigApp.class); 26 | 27 | public static void main(String[] args) throws Exception { 28 | AbstractApplicationContext context = new ClassPathXmlApplicationContext( 29 | "/META-INF/spring/pig-context.xml", PigApp.class); 30 | log.info("Pig Application Running"); 31 | context.registerShutdownHook(); 32 | 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /pig/src/main/java/org/springframework/samples/hadoop/pig/PigAppWithApacheLogs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2012 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.samples.hadoop.pig; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.context.support.AbstractApplicationContext; 21 | import org.springframework.context.support.ClassPathXmlApplicationContext; 22 | 23 | public class PigAppWithApacheLogs { 24 | 25 | private static final Log log = LogFactory.getLog(PigAppWithApacheLogs.class); 26 | 27 | public static void main(String[] args) throws Exception { 28 | AbstractApplicationContext context = new ClassPathXmlApplicationContext( 29 | "/META-INF/spring/pig-context-apache-logs.xml", PigAppWithApacheLogs.class); 30 | log.info("Pig Application Running"); 31 | context.registerShutdownHook(); 32 | 33 | 34 | 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /pig/src/main/java/org/springframework/samples/hadoop/pig/PigAppWithRepository.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011-2012 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.samples.hadoop.pig; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.context.support.AbstractApplicationContext; 21 | import org.springframework.context.support.ClassPathXmlApplicationContext; 22 | import org.springframework.data.hadoop.fs.FsShell; 23 | 24 | public class PigAppWithRepository { 25 | 26 | private static final Log log = LogFactory.getLog(PigAppWithRepository.class); 27 | 28 | public static void main(String[] args) throws Exception { 29 | AbstractApplicationContext context = new ClassPathXmlApplicationContext( 30 | "/META-INF/spring/pig-context-password-repository.xml", PigAppWithRepository.class); 31 | log.info("Pig Application Running"); 32 | context.registerShutdownHook(); 33 | 34 | String outputDir = "/data/password-repo/output"; 35 | FsShell fsShell = context.getBean(FsShell.class); 36 | if (fsShell.test(outputDir)) { 37 | fsShell.rmr(outputDir); 38 | } 39 | 40 | PasswordRepository repo = context.getBean(PigPasswordRepository.class); 41 | repo.processPasswordFile("/data/passwd/input"); 42 | 43 | /* 44 | Collection files = new ArrayList(); 45 | files.add("/data/passwd/input"); 46 | files.add("/data/passwd/input2"); 47 | repo.processPasswordFiles(files); 48 | */ 49 | 50 | 51 | 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /pig/src/main/java/org/springframework/samples/hadoop/pig/PigPasswordRepository.java: -------------------------------------------------------------------------------- 1 | package org.springframework.samples.hadoop.pig; 2 | 3 | import java.util.Collection; 4 | import java.util.Properties; 5 | 6 | import org.springframework.data.hadoop.pig.PigOperations; 7 | import org.springframework.data.hadoop.util.PathUtils; 8 | import org.springframework.util.Assert; 9 | 10 | public class PigPasswordRepository implements PasswordRepository { 11 | 12 | private PigOperations pigOperations; 13 | 14 | private String pigScript = "classpath:password-analysis.pig"; 15 | 16 | public PigPasswordRepository(PigOperations pigOperations) { 17 | Assert.notNull(pigOperations); 18 | this.pigOperations = pigOperations; 19 | } 20 | 21 | public void setPigScript(String pigScript) { 22 | this.pigScript = pigScript; 23 | } 24 | 25 | @Override 26 | public void processPasswordFile(String inputFile) { 27 | Assert.notNull(inputFile); 28 | String outputDir = 29 | PathUtils.format("/data/password-repo/output/%1$tY/%1$tm/%1$td/%1$tH/%1$tM/%1$tS"); 30 | Properties scriptParameters = new Properties(); 31 | scriptParameters.put("inputDir", inputFile); 32 | scriptParameters.put("outputDir", outputDir); 33 | pigOperations.executeScript(pigScript, scriptParameters); 34 | } 35 | 36 | @Override 37 | public void processPasswordFiles(Collection inputFiles) { 38 | for (String inputFile : inputFiles) { 39 | processPasswordFile(inputFile); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /pig/src/main/resources/META-INF/spring/pig-context-apache-logs.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 11 | 12 | 13 | fs.defaultFS=${hd.fs} 14 | yarn.resourcemanager.address=${hd.rm} 15 | mapreduce.framework.name=yarn 16 | mapreduce.jobhistory.address=${hd.jh} 17 | 18 | 19 | 20 | 21 | 26 | 27 | 30 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /pig/src/main/resources/META-INF/spring/pig-context-password-repository.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 11 | 12 | 13 | fs.defaultFS=${hd.fs} 14 | yarn.resourcemanager.address=${hd.rm} 15 | mapreduce.framework.name=yarn 16 | mapreduce.jobhistory.address=${hd.jh} 17 | 18 | 19 | 20 | 21 | 22 | 23 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /pig/src/main/resources/META-INF/spring/pig-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 11 | 12 | 13 | fs.defaultFS=${hd.fs} 14 | yarn.resourcemanager.address=${hd.rm} 15 | mapreduce.framework.name=yarn 16 | mapreduce.jobhistory.address=${hd.jh} 17 | 18 | 19 | 20 | 21 | 26 | 27 | 30 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /pig/src/main/resources/apache-log-simple.pig: -------------------------------------------------------------------------------- 1 | REGISTER $piggybanklib; 2 | DEFINE LogLoader org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader(); 3 | logs = LOAD '$inputPath' USING LogLoader as (remoteHost, remoteLogname, user, time, method, uri, proto, status, bytes, referer, userAgent); 4 | -- logs = FILTER logs BY method == 'GET' AND status == 200; 5 | -- logs = FOREACH logs GENERATE uri; 6 | -- basic dump of URI matching the criteria above 7 | -- DUMP logs; 8 | 9 | -- determine popular URLs (for caching purposes for example) 10 | byUri = ORDER logs BY uri; 11 | byUri = GROUP logs BY uri; 12 | 13 | uriHits = FOREACH byUri GENERATE group AS uri, COUNT(logs.uri) AS numHits; 14 | -- or store into into a file 15 | STORE uriHits into '$outputPath/pig_uri_hits'; 16 | -------------------------------------------------------------------------------- /pig/src/main/resources/apache-log.pig: -------------------------------------------------------------------------------- 1 | REGISTER $piggybanklib; 2 | DEFINE LogLoader org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader(); 3 | logs = LOAD '$inputPath' USING LogLoader as (remoteHost, remoteLogname, user, time, method, uri, proto, status, bytes, referer, userAgent); 4 | -- logs = FILTER logs BY method == 'GET' AND status == 200; 5 | -- logs = FOREACH logs GENERATE uri; 6 | -- basic dump of URI matching the criteria above 7 | -- DUMP logs; 8 | 9 | -- determine popular URLs (for caching purposes for example) 10 | byUri = ORDER logs BY uri; 11 | byUri = GROUP logs BY uri; 12 | 13 | uriHits = FOREACH byUri GENERATE group AS uri, COUNT(logs.uri) AS numHits; 14 | -- or store into into a file 15 | STORE uriHits into 'pig_uri_hits'; 16 | 17 | byCount = GROUP uriHits BY numHits; 18 | histogram = FOREACH byCount GENERATE group AS numHits, COUNT(uriHits) AS numUris; 19 | 20 | -- isolate singular hits 21 | lowHits = FILTER histogram BY numHits == 1; 22 | lowHits = FOREACH lowHits GENERATE numUris AS num; 23 | STORE lowHits INTO 'pig_low_hits'; 24 | 25 | highHits = FILTER histogram BY numHits > 1; 26 | highHitsNumber = FOREACH highHits GENERATE (numHits * numUris) AS num; 27 | highHitsNumber = GROUP highHitsNumber ALL; 28 | highHitsNumber = FOREACH highHitsNumber GENERATE SUM(highHitsNumber.num); 29 | STORE highHitsNumber INTO 'pig_high_hits'; 30 | -- URIs that benefit from caching 31 | uriToCache = GROUP highHits ALL; 32 | uriToCache = FOREACH uriToCache GENERATE SUM(highHits.numUris); 33 | 34 | STORE uriToCache INTO 'pig_uris_to_cache'; 35 | -------------------------------------------------------------------------------- /pig/src/main/resources/copy-files.groovy: -------------------------------------------------------------------------------- 1 | //requires three variables, localSourceFile and inputDir, outputDir 2 | 3 | // use the shell (made available under variable fsh) 4 | 5 | if (!fsh.test(inputDir)) { 6 | fsh.mkdir(inputDir); 7 | fsh.copyFromLocal(localSourceFile, inputDir); 8 | fsh.chmod(700, inputDir) 9 | } 10 | if (fsh.test(outputDir)) { 11 | fsh.rmr(outputDir) 12 | } -------------------------------------------------------------------------------- /pig/src/main/resources/core-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /pig/src/main/resources/hadoop.properties: -------------------------------------------------------------------------------- 1 | hd.fs=hdfs://localhost:8020 2 | hd.rm=localhost:8032 3 | hd.jh=localhost:10020 4 | 5 | localSourceFile=/etc/passwd 6 | inputDir=/data/passwd/input 7 | outputDir=/data/passwd/output 8 | -------------------------------------------------------------------------------- /pig/src/main/resources/password-analysis.pig: -------------------------------------------------------------------------------- 1 | passwd = LOAD '$inputDir' USING PigStorage(':') AS (user:chararray, passwd:chararray, uid:int, gid:int, userinfo:chararray, home:chararray, shell:chararray); 2 | grp_shell = GROUP passwd BY shell; 3 | counts = FOREACH grp_shell GENERATE group, COUNT(passwd); 4 | STORE counts into '$outputDir'; 5 | 6 | 7 | -------------------------------------------------------------------------------- /pig/src/main/resources/pig-analysis.properties: -------------------------------------------------------------------------------- 1 | pig.piggybanklib=${basedir}/lib/piggybank-0.9.2.jar 2 | pig.localSourceFile=${basedir}/data/apache.log 3 | pig.inputPath=/data/input 4 | pig.outputPath=/data/output 5 | -------------------------------------------------------------------------------- /pig/src/main/resources/pig-server.properties: -------------------------------------------------------------------------------- 1 | pig.cachedbag.memusage=0.1 2 | pig.skewedjoin.reduce.memusage=0.3 3 | -------------------------------------------------------------------------------- /yarn/gradle.properties: -------------------------------------------------------------------------------- 1 | version = 2.0.0.BUILD-SNAPSHOT 2 | hadoop26SpringDataVersion = 2.2.0.RELEASE 3 | junitVersion = 4.8.2 4 | log4jVersion = 1.2.16 5 | slf4jVersion = 1.6.4 6 | hamcrestVersion = 1.2.1 7 | groovyVersion = 1.8.5 8 | -------------------------------------------------------------------------------- /yarn/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spring-attic/spring-hadoop-samples/18e6641b2ae6830753abc9b0e7a3e45ade814e0e/yarn/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /yarn/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Thu Jul 30 16:33:21 EDT 2015 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=http\://services.gradle.org/distributions/gradle-2.2.1-bin.zip 7 | -------------------------------------------------------------------------------- /yarn/gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 12 | set DEFAULT_JVM_OPTS= 13 | 14 | set DIRNAME=%~dp0 15 | if "%DIRNAME%" == "" set DIRNAME=. 16 | set APP_BASE_NAME=%~n0 17 | set APP_HOME=%DIRNAME% 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windowz variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | if "%@eval[2+2]" == "4" goto 4NT_args 53 | 54 | :win9xME_args 55 | @rem Slurp the command line arguments. 56 | set CMD_LINE_ARGS= 57 | set _SKIP=2 58 | 59 | :win9xME_args_slurp 60 | if "x%~1" == "x" goto execute 61 | 62 | set CMD_LINE_ARGS=%* 63 | goto execute 64 | 65 | :4NT_args 66 | @rem Get arguments from the 4NT Shell from JP Software 67 | set CMD_LINE_ARGS=%$ 68 | 69 | :execute 70 | @rem Setup the command line 71 | 72 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if "%ERRORLEVEL%"=="0" goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 85 | exit /b 1 86 | 87 | :mainEnd 88 | if "%OS%"=="Windows_NT" endlocal 89 | 90 | :omega 91 | -------------------------------------------------------------------------------- /yarn/settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'spring-yarn-examples' 2 | 3 | include ':yarn:simple-command',':yarn:list-applications',':yarn:multi-context',':yarn:kill-application',':yarn:batch-partition',':yarn:batch-files',':yarn:custom-amservice',':yarn:batch-amjob',':yarn:restart-context' 4 | 5 | rootProject.children.find { 6 | if (it.name == 'yarn') { 7 | it.name = 'yarn-examples-common' 8 | it.children.each { 9 | it.name = 'yarn-examples-' + it.name 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /yarn/yarn/README.md: -------------------------------------------------------------------------------- 1 | Spring Yarn Examples Common 2 | =========================== 3 | 4 | This project contains a shared code for all Spring Yarn examples. 5 | -------------------------------------------------------------------------------- /yarn/yarn/batch-amjob/README.md: -------------------------------------------------------------------------------- 1 | Spring Yarn Batch Application Master Job Example 2 | ================================================ 3 | 4 | This example demonstrates the use of Spring Yarn functionality to run 5 | Spring Batch jobs on an Application Master without starting 6 | any Containers. 7 | 8 | To test this example: 9 | 10 | # gradlew clean :yarn-examples-common:yarn-examples-batch-amjob:build 11 | 12 | To run this example against local Hadoop cluster: 13 | 14 | # gradlew -q run-yarn-examples-batch-amjob 15 | # gradlew -q run-yarn-examples-batch-amjob -DjobName=job2 16 | 17 | To run this example against remote Hadoop cluster: 18 | 19 | # gradlew -q run-yarn-examples-batch-amjob -Dhd.fs=hdfs://192.168.223.170:8020 -Dhd.rm=192.168.223.170:8032 -Dlocalresources.remote=hdfs://192.168.223.170:8020 20 | 21 | # Details 22 | 23 | This is a simplistic example using Spring Batch without adding any 24 | complex configuration logic for partitioned steps on a Yarn cluster. 25 | To verify that job was executed, see that the *Appmaster.stdout* 26 | file contains log message "INFO [PrintTasklet] - execute: Hello1". 27 | 28 | This quick command set assumes local hadoop cluster with default settings. 29 | 30 | # gradlew :yarn-examples-common:yarn-examples-batch-amjob:build -x test 31 | # hdfs dfs -rm -R /app /lib 32 | # hdfs dfs -mkdir -p /app/batch-amjob /lib 33 | # hdfs dfs -copyFromLocal yarn/batch-amjob/build/libs/* /app/batch-amjob 34 | # hdfs dfs -copyFromLocal yarn/batch-amjob/build/dependency-libs/* /lib 35 | # gradlew -q run-yarn-examples-batch-amjob 36 | -------------------------------------------------------------------------------- /yarn/yarn/batch-amjob/src/main/java/org/springframework/yarn/examples/PrintTasklet.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.batch.core.StepContribution; 21 | import org.springframework.batch.core.scope.context.ChunkContext; 22 | import org.springframework.batch.core.step.tasklet.Tasklet; 23 | import org.springframework.batch.repeat.RepeatStatus; 24 | 25 | /** 26 | * Example tasklet. 27 | * 28 | * @author Janne Valkealahti 29 | * 30 | */ 31 | public class PrintTasklet implements Tasklet { 32 | 33 | private static final Log log = LogFactory.getLog(PrintTasklet.class); 34 | 35 | private String message; 36 | 37 | public void setMessage(String message) { 38 | this.message = message; 39 | } 40 | 41 | @Override 42 | public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { 43 | log.info("execute: " + message); 44 | return null; 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /yarn/yarn/batch-amjob/src/main/resources/application-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | job1 14 | 15 | 16 | 17 | 18 | 19 | fs.defaultFS=${hd.fs} 20 | yarn.resourcemanager.address=${hd.rm} 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | jobName=${jobName} 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /yarn/yarn/batch-amjob/src/test/resources/org/springframework/yarn/examples/BatchAmjobTests-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | job1 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | jobName=${jobName} 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /yarn/yarn/batch-files/src/main/java/org/springframework/yarn/examples/LoggingItemWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import java.util.List; 19 | 20 | import org.apache.commons.logging.Log; 21 | import org.apache.commons.logging.LogFactory; 22 | import org.springframework.batch.item.ItemWriter; 23 | 24 | /** 25 | * Simple writer which just logs items. 26 | * 27 | * @author Janne Valkealahti 28 | * 29 | */ 30 | public class LoggingItemWriter implements ItemWriter { 31 | 32 | private final static Log log = LogFactory.getLog(LoggingItemWriter.class); 33 | 34 | @Override 35 | public void write(List items) throws Exception { 36 | for (String item : items) { 37 | log.info("writing: " + item); 38 | } 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /yarn/yarn/batch-files/src/main/java/org/springframework/yarn/examples/PrintTasklet.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.batch.core.ExitStatus; 21 | import org.springframework.batch.core.StepContribution; 22 | import org.springframework.batch.core.scope.context.ChunkContext; 23 | import org.springframework.batch.core.step.tasklet.Tasklet; 24 | import org.springframework.batch.repeat.RepeatStatus; 25 | 26 | /** 27 | * Example tasklet. 28 | * 29 | * @author Janne Valkealahti 30 | * 31 | */ 32 | public class PrintTasklet implements Tasklet { 33 | 34 | private static final Log log = LogFactory.getLog(PrintTasklet.class); 35 | 36 | private String message; 37 | 38 | public void setMessage(String message) { 39 | this.message = message; 40 | } 41 | 42 | public ExitStatus execute() throws Exception { 43 | log.info("execute1: " + message); 44 | System.out.print(message); 45 | return ExitStatus.COMPLETED; 46 | } 47 | 48 | @Override 49 | public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { 50 | log.info("execute2: " + message); 51 | return null; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /yarn/yarn/batch-files/src/main/resources/application-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | 14 | 15 | fs.defaultFS=${hd.fs} 16 | yarn.resourcemanager.address=${hd.rm} 17 | fs.hdfs.impl=org.apache.hadoop.hdfs.DistributedFileSystem 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 29 | 30 | 31 | 32 | 33 | 34 | ${files:/syarn-tmp/batch-files/set1/*} 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /yarn/yarn/batch-files/src/test/resources/org/springframework/yarn/examples/BatchFilesTests-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 25 | 26 | 27 | 28 | 29 | 30 | ${files:/syarn-tmp/batch-files/set1/*} 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /yarn/yarn/batch-partition/src/main/java/org/springframework/yarn/examples/PrintTasklet.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.batch.core.ExitStatus; 21 | import org.springframework.batch.core.StepContribution; 22 | import org.springframework.batch.core.scope.context.ChunkContext; 23 | import org.springframework.batch.core.step.tasklet.Tasklet; 24 | import org.springframework.batch.repeat.RepeatStatus; 25 | 26 | /** 27 | * Example tasklet. 28 | * 29 | * @author Janne Valkealahti 30 | * 31 | */ 32 | public class PrintTasklet implements Tasklet { 33 | 34 | private static final Log log = LogFactory.getLog(PrintTasklet.class); 35 | 36 | private String message; 37 | 38 | public void setMessage(String message) { 39 | this.message = message; 40 | } 41 | 42 | public ExitStatus execute() throws Exception { 43 | log.info("execute1: " + message); 44 | System.out.print(message); 45 | return ExitStatus.COMPLETED; 46 | } 47 | 48 | @Override 49 | public RepeatStatus execute(StepContribution contribution, ChunkContext chunkContext) throws Exception { 50 | log.info("execute2: " + message); 51 | return null; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /yarn/yarn/batch-partition/src/main/resources/application-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | fs.defaultFS=${hd.fs} 15 | yarn.resourcemanager.address=${hd.rm} 16 | fs.hdfs.impl=org.apache.hadoop.hdfs.DistributedFileSystem 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /yarn/yarn/batch-partition/src/test/resources/org/springframework/yarn/examples/BatchPartitionTests-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /yarn/yarn/build.gradle: -------------------------------------------------------------------------------- 1 | description = 'Spring Yarn Examples Common' 2 | 3 | project('yarn-examples-simple-command') { 4 | description = 'Spring Yarn with Simple Command Example' 5 | } 6 | 7 | project('yarn-examples-list-applications') { 8 | description = 'Spring Yarn with List Applications Example' 9 | ext.main = 'Main' 10 | } 11 | 12 | project('yarn-examples-multi-context') { 13 | description = 'Spring Yarn with Multi Context Example' 14 | } 15 | 16 | project('yarn-examples-kill-application') { 17 | description = 'Spring Yarn with Kill Application Example' 18 | ext.main = 'Main' 19 | } 20 | 21 | project('yarn-examples-batch-partition') { 22 | description = 'Spring Yarn with Batch Partition Example' 23 | } 24 | 25 | project('yarn-examples-batch-files') { 26 | description = 'Spring Yarn with Batch Files Example' 27 | } 28 | 29 | project('yarn-examples-batch-amjob') { 30 | description = 'Spring Yarn Batch Application Master Job Example' 31 | } 32 | 33 | project('yarn-examples-custom-amservice') { 34 | description = 'Spring Yarn with Custom Application Master Service Example' 35 | } 36 | 37 | project('yarn-examples-restart-context') { 38 | description = 'Spring Yarn with Restart Context Example' 39 | } 40 | -------------------------------------------------------------------------------- /yarn/yarn/custom-amservice/README.md: -------------------------------------------------------------------------------- 1 | Spring Yarn Custom Application Master Service Example 2 | ===================================================== 3 | 4 | This example demonstrates the use of Spring Yarn functionality to create 5 | a custom application master service utilised by containers. 6 | 7 | To test this example: 8 | 9 | # gradlew clean :yarn-examples-common:yarn-examples-custom-amservice:build 10 | 11 | To run this example against local Hadoop cluster: 12 | 13 | # gradlew -q run-yarn-examples-custom-amservice 14 | 15 | Adding parameters for job count and average count of containers 16 | 17 | # gradlew -q run-yarn-examples-custom-amservice -Dca.jb=10 -Dca.cc=2 18 | 19 | To run this example against remote Hadoop cluster: 20 | 21 | # gradlew -q run-yarn-examples-custom-amservice -Dhd.fs=hdfs://192.168.223.170:8020 -Dhd.rm=192.168.223.170:8032 -Dlocalresources.remote=hdfs://192.168.223.170:8020 22 | 23 | # Details 24 | 25 | Majority of other examples are just launching containers and possibly passing some extra information 26 | either using environment variables or command line parameters. This is perfectly suiteable if task or 27 | job container is responsible is known prior the container launch operation. 28 | 29 | This example is using customised container, application master and application master service order to 30 | run simple dummy jobs. Application master is setup to execute a number of jobs on number of containers. 31 | Communication between application master and container is done via customised application master service. 32 | Containers are homing back to application master for instruction which can either be job run requests, 33 | requests to wait or requests to die. Container also tries to simulate error conditions by just randomly 34 | exiting itself. 35 | 36 | # Quick Instructions 37 | 38 | This quick command set assumes local hadoop cluster with default settings. 39 | 40 | # gradlew :yarn-examples-common:yarn-examples-custom-amservice:build -x test 41 | # hdfs dfs -rm -R /app /lib 42 | # hdfs dfs -mkdir -p /app/custom-amservice /lib 43 | # hdfs dfs -copyFromLocal yarn/custom-amservice/build/libs/* /app/custom-amservice 44 | # hdfs dfs -copyFromLocal yarn/custom-amservice/build/dependency-libs/* /lib 45 | # gradlew -q run-yarn-examples-custom-amservice 46 | -------------------------------------------------------------------------------- /yarn/yarn/custom-amservice/src/main/java/org/springframework/yarn/examples/JobRequest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import org.springframework.yarn.integration.ip.mind.binding.BaseObject; 19 | 20 | /** 21 | * Simple request to exchange information between 22 | * container and amservice. 23 | * 24 | * @author Janne Valkealahti 25 | * 26 | */ 27 | public class JobRequest extends BaseObject { 28 | 29 | public State state; 30 | public Long job; 31 | 32 | public JobRequest() { 33 | } 34 | 35 | public JobRequest(State state, Long job) { 36 | super(); 37 | this.state = state; 38 | this.job = job; 39 | } 40 | 41 | public State getState() { 42 | return state; 43 | } 44 | 45 | public void setState(State state) { 46 | this.state = state; 47 | } 48 | 49 | public Long getJob() { 50 | return job; 51 | } 52 | 53 | public void setJob(Long job) { 54 | this.job = job; 55 | } 56 | 57 | public enum State { 58 | WHATTODO, 59 | JOBDONE, 60 | JOBFAILED 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /yarn/yarn/custom-amservice/src/main/java/org/springframework/yarn/examples/JobResponse.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import org.springframework.yarn.integration.ip.mind.binding.BaseResponseObject; 19 | 20 | /** 21 | * Simple response to exchange information between 22 | * container and amservice. 23 | * 24 | * @author Janne Valkealahti 25 | * 26 | */ 27 | public class JobResponse extends BaseResponseObject { 28 | 29 | public State state; 30 | public Long job; 31 | 32 | public JobResponse() { 33 | } 34 | 35 | public JobResponse(State state, Long job) { 36 | super(); 37 | this.state = state; 38 | this.job = job; 39 | } 40 | 41 | public State getState() { 42 | return state; 43 | } 44 | 45 | public void setState(State state) { 46 | this.state = state; 47 | } 48 | 49 | public Long getJob() { 50 | return job; 51 | } 52 | 53 | public void setJob(Long job) { 54 | this.job = job; 55 | } 56 | 57 | public enum State { 58 | DIE, 59 | STANDBY, 60 | RUNJOB 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /yarn/yarn/custom-amservice/src/main/resources/application-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | 14 | 15 | fs.defaultFS=${hd.fs} 16 | yarn.resourcemanager.address=${hd.rm} 17 | fs.hdfs.impl=org.apache.hadoop.hdfs.DistributedFileSystem 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | ./* 29 | 30 | 31 | 32 | 33 | ${ca.cc:'2'} 34 | ${ca.jc:'10'} 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /yarn/yarn/custom-amservice/src/main/resources/container-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 38 | 39 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /yarn/yarn/custom-amservice/src/test/resources/org/springframework/yarn/examples/CustomAmserviceTests-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | ${ca.cc:'2'} 27 | ${ca.jc:'10'} 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /yarn/yarn/kill-application/README.md: -------------------------------------------------------------------------------- 1 | Spring Yarn Kill Application Example 2 | ==================================== 3 | 4 | This example demonstrates the use of Spring Yarn functionality to 5 | kill an running application. 6 | 7 | To test this example: 8 | 9 | # gradlew clean :yarn-examples-common:yarn-examples-kill-application:build 10 | 11 | To run this example against local Hadoop cluster: 12 | 13 | # gradlew -q run-yarn-examples-kill-application 14 | 15 | To run this example against remote Hadoop cluster: 16 | 17 | # gradlew -q run-yarn-examples-kill-application -Dhd.fs=hdfs://192.168.223.170:8020 -Dhd.rm=192.168.223.170:8032 -Dlocalresources.remote=hdfs://192.168.223.170:8020 18 | 19 | To run this example against local Hadoop cluster and not automatically killing the application: 20 | 21 | # gradlew -q run-yarn-examples-kill-application -Pnokill=true 22 | 23 | To run this example against local Hadoop cluster and kill existing application. You can get the application id 24 | either from Hadoop Resource Manager ui or using list-applications example: 25 | 26 | # gradlew -q run-yarn-examples-kill-application -Pappid=application_1377856222179_0008 27 | 28 | # Details 29 | 30 | This example launches an simple application with a container whose only task 31 | is to sleep sleep 2 minutes and log a message if Application wasn't killed. 32 | Making sure application is killed successfully, check either Hadoop scheduler GUI 33 | or *Container.stdout* file without a log message 34 | "Hello from KillApplicationContainer, it seems I wasn't killed.". 35 | 36 | This quick command set assumes local hadoop cluster with default settings. 37 | 38 | # gradlew :yarn-examples-common:yarn-examples-kill-application:build -x test 39 | # hdfs dfs -rm -R /app /lib 40 | # hdfs dfs -mkdir -p /app/kill-application /lib 41 | # hdfs dfs -copyFromLocal yarn/kill-application/build/libs/* /app/kill-application 42 | # hdfs dfs -copyFromLocal yarn/kill-application/build/dependency-libs/* /lib 43 | # gradlew -q run-yarn-examples-kill-application 44 | 45 | # gradlew -q run-yarn-examples-kill-application -Pnokill=true 46 | # gradlew -q run-yarn-examples-list-applications 47 | # gradlew -q run-yarn-examples-kill-application -Pappid= 48 | -------------------------------------------------------------------------------- /yarn/yarn/kill-application/src/main/java/org/springframework/yarn/examples/KillApplicationContainer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import java.util.Map; 19 | import java.util.Properties; 20 | 21 | import org.apache.commons.logging.Log; 22 | import org.apache.commons.logging.LogFactory; 23 | import org.springframework.yarn.container.YarnContainer; 24 | 25 | /** 26 | * Container which just sleeps. 27 | * 28 | * @author Janne Valkealahti 29 | * 30 | */ 31 | public class KillApplicationContainer implements YarnContainer { 32 | 33 | private static final Log log = LogFactory.getLog(KillApplicationContainer.class); 34 | 35 | @Override 36 | public void run() { 37 | log.info("Hello from KillApplicationContainer, gonna sleep next 2 minutes..."); 38 | for(int i = 0; i<24; i++) { 39 | log.info("Waiting to get killed"); 40 | try { 41 | Thread.sleep(5000); 42 | } catch (InterruptedException e) { 43 | } 44 | } 45 | log.info("Hello from KillApplicationContainer, it seems I wasn't killed."); 46 | } 47 | 48 | @Override 49 | public void setEnvironment(Map environment) { 50 | } 51 | 52 | @Override 53 | public void setParameters(Properties parameters) { 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /yarn/yarn/kill-application/src/main/resources/application-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | fs.defaultFS=${hd.fs} 15 | yarn.resourcemanager.address=${hd.rm} 16 | fs.hdfs.impl=org.apache.hadoop.hdfs.DistributedFileSystem 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | ./* 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /yarn/yarn/kill-application/src/main/resources/appmaster-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | fs.defaultFS=${SHDP_HD_FS} 22 | yarn.resourcemanager.address=${SHDP_HD_RM} 23 | yarn.resourcemanager.scheduler.address=${SHDP_HD_SCHEDULER} 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /yarn/yarn/kill-application/src/main/resources/container-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /yarn/yarn/kill-application/src/test/java/org/springframework/yarn/examples/KillApplicationTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013-2015 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import static org.hamcrest.CoreMatchers.is; 19 | import static org.hamcrest.CoreMatchers.notNullValue; 20 | import static org.hamcrest.MatcherAssert.assertThat; 21 | 22 | import java.util.concurrent.TimeUnit; 23 | 24 | import org.apache.hadoop.yarn.api.records.ApplicationId; 25 | import org.apache.hadoop.yarn.api.records.YarnApplicationState; 26 | import org.junit.Test; 27 | import org.springframework.test.annotation.Timed; 28 | import org.springframework.test.context.ContextConfiguration; 29 | import org.springframework.yarn.test.context.MiniYarnCluster; 30 | import org.springframework.yarn.test.context.YarnDelegatingSmartContextLoader; 31 | import org.springframework.yarn.test.junit.AbstractYarnClusterTests; 32 | 33 | /** 34 | * Tests for kill application example. 35 | * 36 | * @author Janne Valkealahti 37 | * 38 | */ 39 | @ContextConfiguration(loader=YarnDelegatingSmartContextLoader.class) 40 | @MiniYarnCluster 41 | public class KillApplicationTests extends AbstractYarnClusterTests { 42 | 43 | @Test 44 | @Timed(millis=150000) 45 | public void testAppSubmission() throws Exception { 46 | ApplicationId applicationId = submitApplication(); 47 | YarnApplicationState state = waitState(applicationId, 120, TimeUnit.SECONDS, YarnApplicationState.RUNNING); 48 | assertThat(state, notNullValue()); 49 | getYarnClient().killApplication(applicationId); 50 | state = waitState(applicationId, 20, TimeUnit.SECONDS, YarnApplicationState.KILLED); 51 | assertThat(state, notNullValue()); 52 | assertThat(state, is(YarnApplicationState.KILLED)); 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /yarn/yarn/kill-application/src/test/resources/org/springframework/yarn/examples/KillApplicationTests-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /yarn/yarn/list-applications/README.md: -------------------------------------------------------------------------------- 1 | Spring Yarn List Applications Example 2 | ===================================== 3 | 4 | This example demonstrates the use of Spring Yarn functionality to 5 | use Client and list running applications. 6 | 7 | To test this example: 8 | 9 | # gradlew clean :yarn-examples-common:yarn-examples-list-applications:build 10 | 11 | To run this example against local Hadoop cluster: 12 | 13 | # gradlew -q run-yarn-examples-list-applications 14 | 15 | To run this example against remote Hadoop cluster: 16 | 17 | # gradlew -q run-yarn-examples-list-applications -Dhd.rm=192.168.223.170:8032 18 | 19 | # Details 20 | 21 | With a successful execution you should see something like this 22 | printed in a console: 23 | 24 | ``` 25 | Id User Name Queue StartTime FinishTime State FinalStatus 26 | ------------------------------ -------- ---------------- ------- ---------------- ---------------- -------- ----------- 27 | application_1377856222179_0005 jvalkeal kill-application default 8/30/13 12:20 PM 8/30/13 12:20 PM KILLED KILLED 28 | application_1377856222179_0001 jvalkeal multi-context default 8/30/13 10:57 AM 8/30/13 10:58 AM FINISHED SUCCEEDED 29 | application_1377856222179_0009 jvalkeal kill-application default 8/30/13 12:57 PM 8/30/13 12:58 PM KILLED KILLED 30 | application_1377856222179_0002 jvalkeal multi-context default 8/30/13 11:02 AM 8/30/13 11:03 AM FINISHED SUCCEEDED 31 | ``` 32 | 33 | # Quick Instructions 34 | 35 | This quick command set assumes local hadoop cluster with default settings. 36 | 37 | # gradlew -q run-yarn-examples-list-applications 38 | -------------------------------------------------------------------------------- /yarn/yarn/list-applications/src/main/java/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spring-attic/spring-hadoop-samples/18e6641b2ae6830753abc9b0e7a3e45ade814e0e/yarn/yarn/list-applications/src/main/java/.gitignore -------------------------------------------------------------------------------- /yarn/yarn/list-applications/src/main/java/org/springframework/yarn/examples/Main.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import org.springframework.yarn.YarnSystemConstants; 19 | import org.springframework.yarn.client.CommandLineClientRunner; 20 | 21 | /** 22 | * Main class for list-applications example. 23 | * 24 | * @author Janne Valkealahti 25 | * 26 | */ 27 | public class Main extends CommandLineClientRunner { 28 | 29 | public static void main(String args[]) { 30 | new Main().doMain(new String[] { 31 | YarnSystemConstants.DEFAULT_CONTEXT_FILE_CLIENT, 32 | YarnSystemConstants.DEFAULT_ID_CLIENT, 33 | CommandLineClientRunner.OPT_LIST 34 | }); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /yarn/yarn/list-applications/src/main/resources/application-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | fs.defaultFS=${hd.fs} 15 | yarn.resourcemanager.address=${hd.rm} 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /yarn/yarn/list-applications/src/test/java/org/springframework/yarn/examples/ListApplicationsTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import static org.hamcrest.Matchers.notNullValue; 19 | import static org.junit.Assert.assertThat; 20 | 21 | import java.util.List; 22 | 23 | import org.apache.hadoop.yarn.api.records.ApplicationReport; 24 | import org.junit.Test; 25 | import org.springframework.test.context.ContextConfiguration; 26 | import org.springframework.yarn.test.context.MiniYarnCluster; 27 | import org.springframework.yarn.test.context.YarnDelegatingSmartContextLoader; 28 | import org.springframework.yarn.test.junit.AbstractYarnClusterTests; 29 | 30 | /** 31 | * Tests for list applications example. 32 | * 33 | * @author Janne Valkealahti 34 | * 35 | */ 36 | @ContextConfiguration(loader=YarnDelegatingSmartContextLoader.class) 37 | @MiniYarnCluster 38 | public class ListApplicationsTests extends AbstractYarnClusterTests { 39 | 40 | @Test 41 | public void testClient() throws Exception { 42 | 43 | // just testing that we get no exceptions 44 | List applications = getYarnClient().listApplications(); 45 | assertThat(applications, notNullValue()); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /yarn/yarn/list-applications/src/test/resources/org/springframework/yarn/examples/ListApplicationsTests-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /yarn/yarn/multi-context/README.md: -------------------------------------------------------------------------------- 1 | Spring Yarn Multi Context Example 2 | ================================= 3 | 4 | This example demonstrates the use of Spring Yarn functionality to 5 | launch a simple application with multiple containers. 6 | 7 | To test this example: 8 | 9 | # gradlew clean :yarn-examples-common:yarn-examples-multi-context:build 10 | 11 | To run this example against local Hadoop cluster: 12 | 13 | # gradlew -q run-yarn-examples-multi-context 14 | 15 | To run this example against remote Hadoop cluster: 16 | 17 | # gradlew -q run-yarn-examples-multi-context -Dhd.fs=hdfs://192.168.223.170:8020 -Dhd.rm=192.168.223.170:8032 -Dlocalresources.remote=hdfs://192.168.223.170:8020 18 | 19 | # Details 20 | 21 | This example demonstrates how a simple custom container can be created. 22 | Example launches 4 containers and each container will simply log 23 | "Hello from MultiContextContainer" and exit. 24 | 25 | # Quick Instructions 26 | 27 | This quick command set assumes local hadoop cluster with default settings. 28 | 29 | # gradlew :yarn-examples-common:yarn-examples-multi-context:build -x test 30 | # hdfs dfs -rm -R /app /lib 31 | # hdfs dfs -mkdir -p /app/multi-context /lib 32 | # hdfs dfs -copyFromLocal yarn/multi-context/build/libs/* /app/multi-context 33 | # hdfs dfs -copyFromLocal yarn/multi-context/build/dependency-libs/* /lib 34 | # gradlew -q run-yarn-examples-multi-context 35 | 36 | -------------------------------------------------------------------------------- /yarn/yarn/multi-context/src/main/java/org/springframework/yarn/examples/ContainerConfiguration.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import org.springframework.context.annotation.Configuration; 19 | import org.springframework.yarn.config.annotation.EnableYarn; 20 | import org.springframework.yarn.config.annotation.EnableYarn.Enable; 21 | import org.springframework.yarn.config.annotation.SpringYarnConfigurerAdapter; 22 | import org.springframework.yarn.config.annotation.builders.YarnContainerConfigurer; 23 | 24 | @Configuration 25 | @EnableYarn(enable=Enable.CONTAINER) 26 | public class ContainerConfiguration extends SpringYarnConfigurerAdapter { 27 | 28 | @Override 29 | public void configure(YarnContainerConfigurer container) throws Exception { 30 | container 31 | .containerClass(MultiContextContainer.class); 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /yarn/yarn/multi-context/src/main/java/org/springframework/yarn/examples/MultiContextContainer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import java.util.Map; 19 | import java.util.Properties; 20 | 21 | import org.apache.commons.logging.Log; 22 | import org.apache.commons.logging.LogFactory; 23 | import org.springframework.yarn.container.YarnContainer; 24 | 25 | public class MultiContextContainer implements YarnContainer { 26 | 27 | private static final Log log = LogFactory.getLog(MultiContextContainer.class); 28 | 29 | @Override 30 | public void run() { 31 | log.info("Hello from MultiContextContainer"); 32 | } 33 | 34 | @Override 35 | public void setEnvironment(Map environment) { 36 | } 37 | 38 | @Override 39 | public void setParameters(Properties parameters) { 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /yarn/yarn/multi-context/src/main/resources/application-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | 14 | 15 | fs.defaultFS=${hd.fs} 16 | yarn.resourcemanager.address=${hd.rm} 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 4 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /yarn/yarn/multi-context/src/main/resources/appmaster-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /yarn/yarn/multi-context/src/main/resources/container-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /yarn/yarn/multi-context/src/test/java/org/springframework/yarn/examples/MultiContextXmlConfigTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import org.springframework.beans.factory.annotation.Autowired; 19 | import org.springframework.test.context.ContextConfiguration; 20 | import org.springframework.yarn.test.context.MiniYarnCluster; 21 | import org.springframework.yarn.test.context.YarnDelegatingSmartContextLoader; 22 | 23 | /** 24 | * Use xml config. 25 | * 26 | * @author Janne Valkealahti 27 | * 28 | */ 29 | @ContextConfiguration(loader=YarnDelegatingSmartContextLoader.class) 30 | @MiniYarnCluster 31 | public class MultiContextXmlConfigTests extends MultiContextTests { 32 | 33 | @Autowired 34 | public void setConfiguration(org.apache.hadoop.conf.Configuration configuration) { 35 | super.setConfiguration(configuration); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /yarn/yarn/multi-context/src/test/resources/org/springframework/yarn/examples/MultiContextXmlConfigTests-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 4 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /yarn/yarn/restart-context/README.md: -------------------------------------------------------------------------------- 1 | Spring Yarn Restart Context Example 2 | =================================== 3 | 4 | This example demonstrates the use of Spring Yarn functionality to 5 | use custom application master and container handling restart 6 | of failed container. 7 | 8 | To test this example: 9 | 10 | # gradlew clean :yarn-examples-common:yarn-examples-restart-context:build 11 | 12 | To run this example against local Hadoop cluster: 13 | 14 | # gradlew -q run-yarn-examples-restart-context 15 | 16 | To run this example against remote Hadoop cluster: 17 | 18 | # gradlew -q run-yarn-examples-restart-context -Dhd.fs=hdfs://192.168.223.170:8020 -Dhd.rm=192.168.223.170:8032 -Dlocalresources.remote=hdfs://192.168.223.170:8020 19 | 20 | # Details 21 | 22 | This example demonstrates how a failed container with abnormal 23 | exit code can be handled in custom application master and 24 | request a new container launch. 25 | 26 | # Quick Instructions 27 | 28 | This quick command set assumes local hadoop cluster with default settings. 29 | 30 | # gradlew :yarn-examples-common:yarn-examples-restart-context:build -x test 31 | # hdfs dfs -rm -R /app /lib 32 | # hdfs dfs -mkdir -p /app/restart-context /lib 33 | # hdfs dfs -copyFromLocal yarn/restart-context/build/libs/* /app/restart-context 34 | # hdfs dfs -copyFromLocal yarn/restart-context/build/dependency-libs/* /lib 35 | # gradlew -q run-yarn-examples-restart-context 36 | -------------------------------------------------------------------------------- /yarn/yarn/restart-context/src/main/java/org/springframework/yarn/examples/FailingContextContainer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.springframework.yarn.YarnSystemConstants; 21 | import org.springframework.yarn.container.AbstractYarnContainer; 22 | import org.springframework.yarn.container.YarnContainer; 23 | 24 | /** 25 | * Simple {@link YarnContainer} example which is able 26 | * to exist with error status to demonstrate how it is 27 | * handled on Application Master. 28 | * 29 | * @author Janne Valkealahti 30 | * 31 | */ 32 | public class FailingContextContainer extends AbstractYarnContainer { 33 | 34 | private static final Log log = LogFactory.getLog(FailingContextContainer.class); 35 | 36 | @Override 37 | protected void runInternal() { 38 | log.info("Hello from FailingContextContainer"); 39 | 40 | // just get the last number from "container_1377159599818_0001_01_000002" 41 | String containerIdString = getEnvironment(YarnSystemConstants.SYARN_CONTAINER_ID); 42 | int containerId = Integer.parseInt(containerIdString.substring(containerIdString.length()-1)); 43 | 44 | // We just use the container id found from token variable 45 | // to fail first container (with id 2) 46 | if ((containerId == 2)) { 47 | log.info("Exiting with error"); 48 | System.exit(1); 49 | } else { 50 | log.info("Exiting with ok"); 51 | System.exit(0); 52 | } 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /yarn/yarn/restart-context/src/main/resources/application-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | fs.defaultFS=${hd.fs} 15 | yarn.resourcemanager.address=${hd.rm} 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /yarn/yarn/restart-context/src/main/resources/appmaster-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | fs.defaultFS=${SHDP_HD_FS} 17 | yarn.resourcemanager.address=${SHDP_HD_RM} 18 | yarn.resourcemanager.scheduler.address=${SHDP_HD_SCHEDULER} 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /yarn/yarn/restart-context/src/main/resources/container-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /yarn/yarn/restart-context/src/test/resources/org/springframework/yarn/examples/RestartContextTests-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | fs.defaultFS=${hd.fs} 15 | yarn.resourcemanager.address=${hd.rm} 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /yarn/yarn/simple-command/README.md: -------------------------------------------------------------------------------- 1 | Spring Yarn Simple Command Example 2 | ================================== 3 | 4 | This example demonstrates the use of Spring Yarn functionality to 5 | launch a simple application with multiple containers. 6 | 7 | To test this example: 8 | 9 | # gradlew clean :yarn-examples-common:yarn-examples-simple-command:build 10 | 11 | To run this example against local Hadoop cluster: 12 | 13 | # gradlew -q run-yarn-examples-simple-command 14 | 15 | To run this example against remote Hadoop cluster: 16 | 17 | # gradlew -q run-yarn-examples-simple-command -Dhd.fs=hdfs://192.168.223.170:8020 -Dhd.rm=192.168.223.170:8032 -Dlocalresources.remote=hdfs://192.168.223.170:8020 18 | 19 | # Details 20 | 21 | This example demonstrates how a simple custom container can be created. 22 | Example launches 4 containers and each container will simply log 23 | an output of *date* command and exit. 24 | 25 | This is one of the most simplistic examples to run something on Yarn. 26 | Containers which are launched are simply an placeholder to run commands 27 | on OS. 28 | 29 | # Quick Instructions 30 | 31 | This quick command set assumes local hadoop cluster with default settings. 32 | 33 | # gradlew :yarn-examples-common:yarn-examples-simple-command:build -x test 34 | # hdfs dfs -rm -R /app /lib 35 | # hdfs dfs -mkdir -p /app/simple-command /lib 36 | # hdfs dfs -copyFromLocal yarn/simple-command/build/libs/* /app/simple-command 37 | # hdfs dfs -copyFromLocal yarn/simple-command/build/dependency-libs/* /lib 38 | # gradlew -q run-yarn-examples-simple-command 39 | -------------------------------------------------------------------------------- /yarn/yarn/simple-command/src/main/java/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/spring-attic/spring-hadoop-samples/18e6641b2ae6830753abc9b0e7a3e45ade814e0e/yarn/yarn/simple-command/src/main/java/.gitignore -------------------------------------------------------------------------------- /yarn/yarn/simple-command/src/main/resources/application-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | 14 | 15 | fs.defaultFS=${hd.fs} 16 | yarn.resourcemanager.address=${hd.rm} 17 | fs.hdfs.impl=org.apache.hadoop.hdfs.DistributedFileSystem 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 4 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /yarn/yarn/simple-command/src/main/resources/appmaster-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | fs.defaultFS=${SHDP_HD_FS} 18 | yarn.resourcemanager.address=${SHDP_HD_RM} 19 | yarn.resourcemanager.scheduler.address=${SHDP_HD_SCHEDULER} 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | ./* 30 | 31 | 32 | 33 | 34 | 35 | 36 | /Container.stdout 39 | 2>/Container.stderr 40 | ]]> 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /yarn/yarn/simple-command/src/test/resources/org/springframework/yarn/examples/SimpleCommandTests-context.xml: -------------------------------------------------------------------------------- 1 | 2 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 4 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /yarn/yarn/src/main/java/org/springframework/yarn/examples/CommonMain.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2013 the original author or authors. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package org.springframework.yarn.examples; 17 | 18 | import org.springframework.yarn.YarnSystemConstants; 19 | import org.springframework.yarn.client.CommandLineClientRunner; 20 | 21 | /** 22 | * Main class for examples submitting application. 23 | * 24 | * @author Janne Valkealahti 25 | * 26 | */ 27 | public class CommonMain extends CommandLineClientRunner { 28 | 29 | public static void main(String args[]) { 30 | new CommonMain().doMain(new String[] { 31 | YarnSystemConstants.DEFAULT_CONTEXT_FILE_CLIENT, 32 | YarnSystemConstants.DEFAULT_ID_CLIENT, 33 | CommandLineClientRunner.OPT_SUBMIT 34 | }); 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /yarn/yarn/src/main/resources/hadoop.properties: -------------------------------------------------------------------------------- 1 | hd.fs=hdfs://localhost:8020 2 | hd.rm=localhost:8032 3 | hd.scheduler=localhost:8030 4 | -------------------------------------------------------------------------------- /yarn/yarn/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootCategory=WARN, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d %p [%C{1}] - %m%n 6 | 7 | log4j.category.org.springframework.yarn=DEBUG 8 | log4j.category.org.springframework.data.hadoop=DEBUG 9 | 10 | --------------------------------------------------------------------------------