├── .gitignore ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── bin ├── README ├── create_schema.rb ├── encode_job_id.rb ├── etl │ ├── hraven-etl-env.sh │ ├── hraven-etl.sh │ ├── jobFileLoader.sh │ ├── jobFilePreprocessor.sh │ ├── jobFileProcessor.sh │ ├── pidfiles.sh │ └── processingRecordsPrinter.sh ├── find_framework.rb ├── find_partial_jobs.rb ├── find_partial_raw.rb ├── get_flow.rb ├── get_flow_events.rb ├── get_flow_stats.rb ├── get_job.rb ├── get_raw.rb ├── hraven ├── hraven-daemon.sh └── job_level_statistics.rb ├── conf ├── hraven-env.sh ├── log4j.properties └── sampleCostDetails.properties ├── dev-support └── hraven_eclipse_formatter.xml ├── hraven-assembly ├── pom.xml └── src │ └── main │ └── assembly │ └── all.xml ├── hraven-core ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── twitter │ │ └── hraven │ │ ├── AggregationConstants.java │ │ ├── AppAggregationKey.java │ │ ├── AppKey.java │ │ ├── AppSummary.java │ │ ├── ClientObjectMapper.java │ │ ├── Cluster.java │ │ ├── Constants.java │ │ ├── Counter.java │ │ ├── CounterMap.java │ │ ├── Flow.java │ │ ├── FlowEvent.java │ │ ├── FlowEventKey.java │ │ ├── FlowKey.java │ │ ├── FlowQueueKey.java │ │ ├── Framework.java │ │ ├── HadoopVersion.java │ │ ├── HdfsConstants.java │ │ ├── HdfsStats.java │ │ ├── HdfsStatsKey.java │ │ ├── HravenResponseMetrics.java │ │ ├── JobDesc.java │ │ ├── JobDescFactory.java │ │ ├── JobDescFactoryBase.java │ │ ├── JobDetails.java │ │ ├── JobHistoryKeys.java │ │ ├── JobId.java │ │ ├── JobKey.java │ │ ├── MRJobDescFactory.java │ │ ├── PigJobDescFactory.java │ │ ├── QualifiedJobId.java │ │ ├── QualifiedPathKey.java │ │ ├── Range.java │ │ ├── ScaldingJobDescFactory.java │ │ ├── TaskDetails.java │ │ ├── TaskKey.java │ │ ├── datasource │ │ ├── AppAggregationKeyConverter.java │ │ ├── AppSummaryService.java │ │ ├── AppVersionService.java │ │ ├── ByteConverter.java │ │ ├── DataException.java │ │ ├── FlowEventKeyConverter.java │ │ ├── FlowEventService.java │ │ ├── FlowKeyConverter.java │ │ ├── FlowQueueKeyConverter.java │ │ ├── FlowQueueService.java │ │ ├── HdfsStatsKeyConverter.java │ │ ├── HdfsStatsService.java │ │ ├── JobHistoryByIdService.java │ │ ├── JobHistoryRawService.java │ │ ├── JobHistoryService.java │ │ ├── JobIdConverter.java │ │ ├── JobKeyConverter.java │ │ ├── MissingColumnInResultException.java │ │ ├── ProcessingException.java │ │ ├── QualifiedJobIdConverter.java │ │ ├── RowKeyParseException.java │ │ ├── RunMatchFilter.java │ │ ├── TaskKeyConverter.java │ │ └── VersionInfo.java │ │ ├── rest │ │ ├── HravenRestServer.java │ │ ├── ObjectMapperProvider.java │ │ ├── PaginatedResult.java │ │ ├── RestJSONResource.java │ │ ├── RestResource.java │ │ ├── RestServer.java │ │ ├── SerializationContext.java │ │ └── client │ │ │ ├── HRavenRestClient.java │ │ │ └── UrlDataLoader.java │ │ └── util │ │ ├── BatchUtil.java │ │ ├── ByteArrayWrapper.java │ │ ├── ByteUtil.java │ │ ├── DateUtil.java │ │ ├── HadoopConfUtil.java │ │ ├── JSONUtil.java │ │ └── StringUtil.java │ └── test │ ├── java │ └── com │ │ └── twitter │ │ └── hraven │ │ ├── AllTests.java │ │ ├── GenerateFlowTestData.java │ │ ├── TestAppKey.java │ │ ├── TestAppSummary.java │ │ ├── TestCounterMap.java │ │ ├── TestFlow.java │ │ ├── TestFlowKey.java │ │ ├── TestFramework.java │ │ ├── TestHadoopVersion.java │ │ ├── TestHdfsStatsKey.java │ │ ├── TestJobDescFactory.java │ │ ├── TestJobDescFactoryBase.java │ │ ├── TestJobDetails.java │ │ ├── TestJobHistoryKeys.java │ │ ├── TestJobId.java │ │ ├── TestJobKey.java │ │ ├── TestJsonSerde.java │ │ ├── TestMRJobDescFactory.java │ │ ├── TestPigJobDescFactory.java │ │ ├── TestQualifiedPathKey.java │ │ ├── TestScaldingJobDescFactory.java │ │ ├── TestTaskKey.java │ │ ├── datasource │ │ ├── HRavenTestUtil.java │ │ ├── TestAppSummaryService.java │ │ ├── TestAppVersionService.java │ │ ├── TestFlowEventService.java │ │ ├── TestFlowQueueKeyConverter.java │ │ ├── TestFlowQueueService.java │ │ ├── TestHdfStatsKeyConverter.java │ │ ├── TestHdfsStatsService.java │ │ ├── TestJobHistoryRawService.java │ │ └── TestJobHistoryService.java │ │ ├── rest │ │ └── TestPaginatedResult.java │ │ └── util │ │ ├── TestBatchUtil.java │ │ ├── TestByteArrayWrapper.java │ │ ├── TestByteUtil.java │ │ └── TestHadoopConfUtil.java │ └── resources │ ├── done │ └── something.example.com_1337787092259_job_201205231531_256984_userName1_App1 │ ├── job_1329348432655_0001_conf.xml │ ├── log4j.properties │ └── testhRavenClusters.properties ├── hraven-etl ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── twitter │ │ └── hraven │ │ ├── etl │ │ ├── FileLister.java │ │ ├── FileStatusModificationComparator.java │ │ ├── ImportException.java │ │ ├── JobFile.java │ │ ├── JobFileModifiedRangePathFilter.java │ │ ├── JobFilePartitioner.java │ │ ├── JobFilePathFilter.java │ │ ├── JobFilePreprocessor.java │ │ ├── JobFileProcessor.java │ │ ├── JobFileRawLoader.java │ │ ├── JobHistoryFileParser.java │ │ ├── JobHistoryFileParserBase.java │ │ ├── JobHistoryFileParserFactory.java │ │ ├── JobHistoryFileParserHadoop2.java │ │ ├── JobRunner.java │ │ ├── MinMaxJobFileTracker.java │ │ ├── ProcessRecord.java │ │ ├── ProcessRecordKey.java │ │ ├── ProcessRecordKeyConverter.java │ │ ├── ProcessRecordService.java │ │ ├── ProcessRecordUpdater.java │ │ ├── ProcessState.java │ │ └── ProcessingRecordsPrinter.java │ │ └── mapreduce │ │ ├── CombineFileInputFormat.java │ │ ├── HadoopCompat.java │ │ ├── JobFileRawLoaderMapper.java │ │ ├── JobFileTableMapper.java │ │ ├── ProcessingCounter.java │ │ └── RecordTypes.java │ └── test │ ├── java │ └── com │ │ └── twitter │ │ └── hraven │ │ ├── TestJobFile.java │ │ └── etl │ │ ├── TestFileLister.java │ │ ├── TestFileStatusModificationTimeComparator.java │ │ ├── TestJobHistoryFileParserBase.java │ │ ├── TestJobHistoryFileParserFactory.java │ │ ├── TestJobHistoryFileParserHadoop2.java │ │ └── TestProcessRecord.java │ └── resources │ ├── job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist │ ├── job_1329348432655_0001_conf.xml │ ├── job_1329348432999_0003-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist │ ├── job_1410289045532_259974-1411647985641-user35-SomeJobName-1411647999554-1-0-SUCCEEDED-root.someQueueName-1411647995323.jhist │ ├── job_201311192236_3583_1386370578196_user1_Sleep+job │ └── log4j.properties └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | # Exclude Eclipse files that can be generated with mvn eclipse:eclipse 2 | .classpath 3 | .project 4 | target/ 5 | hraven-core/.settings/ 6 | hraven-etl/.settings/ 7 | 8 | # Used as a local build/deploy dev-cycle script 9 | deploy.sh 10 | 11 | # Where Maven generates its output. 12 | */target/* 13 | */build/* 14 | *.idea/* 15 | *.iml 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | sudo: false 3 | script: umask 0022 && mvn clean test 4 | jdk: 5 | - oraclejdk8 6 | after_success: 7 | - mvn clean cobertura:cobertura coveralls:cobertura 8 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to hRaven 2 | 3 | Looking to contribute something to the hRaven? Here's how you can help. 4 | 5 | ## Bugs reports 6 | 7 | A bug is a _demonstrable problem_ that is caused by the code in the 8 | repository. Good bug reports are extremely helpful - thank you! 9 | 10 | Guidelines for bug reports: 11 | 12 | 1. **Use the GitHub issue search** - check if the issue has already been 13 | reported. 14 | 15 | 2. **Check if the issue has been fixed** - try to reproduce it using the 16 | latest `master` or development branch in the repository. 17 | 18 | 3. **Isolate the problem** - ideally create a reduced test case and a live 19 | example. 20 | 21 | 4. Please try to be as detailed as possible in your report. Include specific 22 | information about the environment - operating system and version, versions 23 | of Hadoop and HBase, version of hRaven - and steps required to reproduce 24 | the issue. 25 | 26 | 27 | ## Feature requests & contribution enquiries 28 | 29 | Feature requests are welcome. But take a moment to find out whether your idea 30 | fits with the scope and aims of the project. It's up to *you* to make a strong 31 | case for the inclusion of your feature. Please provide as much detail and 32 | context as possible. 33 | 34 | Contribution enquiries should take place before any significant pull request, 35 | otherwise you risk spending a lot of time working on something that we might 36 | have good reasons for rejecting. 37 | 38 | 39 | ## Pull requests 40 | 41 | Good pull requests - patches, improvements, new features - are a fantastic 42 | help. They should remain focused in scope and avoid containing unrelated 43 | commits. 44 | 45 | Make sure to adhere to the coding conventions used throughout the codebase 46 | (indentation, accurate comments, etc.) and any other requirements (such as test 47 | coverage). 48 | 49 | Please follow this process; it's the best way to get your work included in the 50 | project: 51 | 52 | 1. Create a new topic branch to contain your feature, change, or fix: 53 | 54 | 2. Commit your changes in logical chunks. Provide clear and explanatory commit 55 | messages. Use git's [interactive rebase](https://help.github.com/articles/interactive-rebase) 56 | feature to tidy up your commits before making them public. 57 | 58 | 3. Locally merge (or rebase) the upstream development branch into your topic branch: 59 | 60 | 4. Push your topic branch up to your fork: 61 | 62 | 5. [Open a Pull Request](http://help.github.com/send-pull-requests/) with a 63 | clear title and description. 64 | 65 | ## License 66 | 67 | By contributing your code, 68 | 69 | You agree to license your contribution under the terms of the Apache Public License 2.0 70 | https://github.com/twitter/hraven/blob/master/LICENSE 71 | -------------------------------------------------------------------------------- /bin/README: -------------------------------------------------------------------------------- 1 | # Copyright 2013 Twitter, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | Running JRuby Scripts 16 | ------------------------------ 17 | JRuby scripts in this directory make use of either the hRaven java APIs 18 | or the HBase client java API directly. 19 | 20 | To run the scripts: 21 | 22 | 1) Make sure the HRAVEN_CLASSPATH environment variable is set to include hbase, 23 | zookeeper and Hadoop jars. For example by running: 24 | 25 | export HRAVEN_CLASSPATH=`hbase classpath` 26 | 27 | 2) Run the ruby scripts from the bin directory: 28 | cd hraven/bin 29 | ./script_name.rb 30 | 31 | 3) Or to run the script from a different directory you'll have to pass 32 | org.jruby.Main class and the qualified path to the ruby script: 33 | somepath/hraven/bin/hraven org.jruby.Main somepath/hraven/bin/script_name.rb 34 | 35 | 36 | 4) When you want to connect to a different HBase cluster, set the HBASE_CONF_DIR 37 | environment variable to the desired HBase configuration: 38 | 39 | export HBASE_CONF_DIR=/etc/hbase/conf-hbase-dc1 40 | cd hraven/bin 41 | ./script_name.rb 42 | 43 | or all on one line without exporting the environment variable: 44 | 45 | cd hraven/bin 46 | HBASE_CONF_DIR=/etc/hbase/conf-hbase-dc1 ./script_name.rb 47 | 48 | 49 | Using the HBase Shell 50 | ------------------------------ 51 | The HBase shell for a specific cluster can be launched by pointing to 52 | the cluster's configuration, ie. to run a shell on the test cluster: 53 | 54 | hbase --config /etc/hbase/conf-hbase-dc1 shell 55 | 56 | 57 | Since many of the table row keys contain encoded versions of the job 58 | ID, these can be difficult to access from the shell. To assist with 59 | generating the encoded job IDs to use in the shell, run the script: 60 | 61 | ./encode_job_id.rb [cluster] jobid 62 | 63 | For example: 64 | 65 | $ ./encode_job_id.rb job_201204041958_222793 66 | \x00\x00\x00.\xD8\xB2\x08\xE6\x00\x00\x00\x00\x00\x03fI 67 | 68 | Then copy and paste the result, for use in shell commands. When using 69 | the byte encoded representation, the field _must_ be enclosed in double 70 | quotes to be handled correct in the JRuby interpreter that runs the 71 | HBase shell: 72 | 73 | > get 'dev.job_history_raw', "cluster@dc1!\x00\x00\x00.\xD8\xB2\x08\xE6\x00\x00\x00\x00\x00\x03fI" 74 | -------------------------------------------------------------------------------- /bin/create_schema.rb: -------------------------------------------------------------------------------- 1 | #!./hraven org.jruby.Main 2 | 3 | # 4 | # Copyright 2013 Twitter, Inc. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | # Create all hRaven tables in HBase 19 | # 20 | # Run this script using the HBase "shell" command: 21 | # 22 | # hbase [--config /path/to/hbase/conf] shell bin/create_table.rb 23 | # 24 | create 'job_history', {NAME => 'i', COMPRESSION => 'LZO'} 25 | 26 | create 'job_history_task', {NAME => 'i', COMPRESSION => 'LZO'} 27 | 28 | # job_history (indexed) by jobId table contains 1 column family: 29 | # i: job-level information specifically the rowkey into the 30 | create 'job_history-by_jobId', {NAME => 'i', COMPRESSION => 'LZO'} 31 | 32 | # job_history_app_version - stores all version numbers seen for a single app ID 33 | # i: "info" -- version information 34 | create 'job_history_app_version', {NAME => 'i', COMPRESSION => 'LZO'} 35 | 36 | create 'job_history_raw', {NAME => 'i', COMPRESSION => 'LZO', BLOOMFILTER => 'ROWCOL'}, 37 | {NAME => 'r', VERSIONS => 1, COMPRESSION => 'LZO', BLOCKCACHE => false} 38 | 39 | # job_history_process - stores metadata about job history data loading process 40 | # i: "info" -- process information 41 | create 'job_history_process', {NAME => 'i', VERSIONS => 10, COMPRESSION => 'LZO'} 42 | 43 | # flow_queue - stores reference to each flow ID running on a cluster, reverse timestamp ordered 44 | create 'flow_queue', {NAME => 'i', VERSIONS => 3, COMPRESSION => 'LZO', BLOOMFILTER => 'ROW'} 45 | 46 | # flow_event - stores events fired during pig job execution 47 | create 'flow_event', {NAME => 'i', VERSIONS => 3, COMPRESSION => 'LZO', BLOOMFILTER => 'ROW'} 48 | 49 | # job_history_agg_daily - stores daily aggregated job info 50 | # the s column family has a TTL of 30 days, it's used as a scratch col family 51 | # it stores the run ids that are seen for that day 52 | # we assume that a flow will not run for more than 30 days, hence it's fine to "expire" that data 53 | create 'job_history_agg_daily', {NAME => 'i', COMPRESSION => 'LZO', BLOOMFILTER => 'ROWCOL'}, 54 | {NAME => 's', VERSIONS => 1, COMPRESSION => 'LZO', BLOCKCACHE => false, TTL => '2592000'} 55 | 56 | # job_history_agg_weekly - stores weekly aggregated job info 57 | # the s column family has a TTL of 30 days 58 | # it stores the run ids that are seen for that week 59 | # we assume that a flow will not run for more than 30 days, hence it's fine to "expire" that data 60 | create 'job_history_agg_weekly', {NAME => 'i', COMPRESSION => 'LZO', BLOOMFILTER => 'ROWCOL'}, 61 | {NAME => 's', VERSIONS => 1, COMPRESSION => 'LZO', BLOCKCACHE => false, TTL => '2592000'} 62 | 63 | exit 64 | -------------------------------------------------------------------------------- /bin/encode_job_id.rb: -------------------------------------------------------------------------------- 1 | #!./hraven org.jruby.Main 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # 19 | # Show the byte respresentation for a job ID, which is comprised of a cluster identifier and a jobid. 20 | # 21 | # Execute this script from the bin directory like this: 22 | # hraven/bin$ ./encode_job_id.rb [cluster] jobid 23 | # 24 | # Or from anywhere like this: 25 | # hraven$ bin/hraven org.jruby.Main bin/encode_job_id.rb [cluster] jobid 26 | 27 | include Java 28 | 29 | import com.twitter.hraven.datasource.JobIdConverter 30 | import com.twitter.hraven.datasource.QualifiedJobIdConverter 31 | import com.twitter.hraven.JobId 32 | import com.twitter.hraven.QualifiedJobId 33 | 34 | import org.apache.hadoop.hbase.util.Bytes 35 | 36 | if ARGV.length == 2 37 | id = QualifiedJobId.new(ARGV[0], ARGV[1]) 38 | puts Bytes.toStringBinary(QualifiedJobIdConverter.new().toBytes(id)) 39 | elsif ARGV.length == 1 40 | id = JobId.new(ARGV[0]) 41 | puts Bytes.toStringBinary(JobIdConverter.new().toBytes(id)) 42 | else 43 | puts "Usage: encode_job_id.rb [cluster] jobid" 44 | exit 1 45 | end 46 | -------------------------------------------------------------------------------- /bin/etl/hraven-etl-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # Used to configure hraven-etl environment 18 | 19 | home=$(dirname $0) 20 | source $home/../../conf/hraven-env.sh 21 | source $home/pidfiles.sh 22 | 23 | #check if hraven-core.jar and hraven-etl.jar exist 24 | #if not, create symbolic links to the needed jars 25 | libhraven=`cd $(dirname $0)/../../lib;pwd;` 26 | if [ ! -f $libhraven/hraven-core.jar ]; then 27 | coreLastVersion=`ls $libhraven/hraven-core-*.jar | sort -V | tail -1` 28 | ln -s $coreLastVersion $libhraven/hraven-core.jar 29 | echo "hraven-core.jar linked to $coreLastVersion" 30 | fi 31 | if [ ! -f $libhraven/hraven-etl.jar ]; then 32 | etlLastVersion=`ls $libhraven/hraven-etl-*.jar | sort -V | tail -1` 33 | ln -s $etlLastVersion $libhraven/hraven-etl.jar 34 | echo "hraven-etl.jar linked to $etlLastVersion" 35 | fi 36 | 37 | # set the hraven-core jar as part of libjars and hadoop classpath 38 | # set this here because it only pertains to the etl logic 39 | export LIBJARS=$home/../../lib/hraven-core.jar 40 | export HADOOP_CLASSPATH=$home/../../lib/*:$LIBJARS:$HBASE_CLASSPATH 41 | hravenEtlJar=$home/../../lib/hraven-etl.jar 42 | -------------------------------------------------------------------------------- /bin/etl/hraven-etl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | # Used pre-process unprocessed files in the /hadoop/mapred/history/done directory 19 | # For each batch jobFiles, write a sequence file in /hadoop/mapred/history/processing/ 20 | # listing the jobFiles to be loaded and create corresponding process Record. 21 | # 22 | # Load all the jobFiles listed in the process file from the process record into HBase 23 | # 24 | # Script Use: 25 | # 1. Set below parameters to correct values for execution environment 26 | # 2. Run using "./hraven-etl.sh" 27 | # 28 | 29 | # Parameters 30 | ########## FILL IN APPROPRIATE VALUES BELOW ########## 31 | cluster="mycluster" #Name of your cluster (arbitrary) 32 | mapredmaxsplitsize="204800" 33 | batchsize="100" #default is 1, which is bad for mapred job 34 | schedulerpoolname="mypool" #name of scheduler pool (arbitrary) 35 | threads="20" 36 | defaultrawfilesizelimit="524288000" 37 | machinetype="mymachine" #name of machine (arbitrary) 38 | 39 | #conf directories 40 | hadoopconfdir=${HADOOP_CONF_DIR:-$HADOOP_HOME/conf} 41 | hbaseconfdir=${HBASE_CONF_DIR:-$HBASE_HOME/conf} 42 | # HDFS directories for processing and loading job history data 43 | historyRawDir=/yarn/history/done/ 44 | historyProcessingDir=/hraven/processing/ 45 | ####################################################### 46 | 47 | 48 | source $(dirname $0)/hraven-etl-env.sh 49 | 50 | batchsizejobs=$(($batchsize / 2)) 51 | myscriptname=$(basename "$0" .sh) 52 | stopfile=$HRAVEN_PID_DIR/$myscriptname.stop 53 | 54 | if [ -f $stopfile ]; then 55 | echo "Error: not allowed to run. Remove $stopfile continue." 1>&2 56 | exit 1 57 | fi 58 | 59 | create_pidfile $HRAVEN_PID_DIR 60 | trap 'cleanup_pidfile_and_exit $HRAVEN_PID_DIR' INT TERM EXIT 61 | 62 | # Pre-process 63 | $home/jobFilePreprocessor.sh $hadoopconfdir $historyRawDir $historyProcessingDir $cluster $batchsize $defaultrawfilesizelimit 64 | 65 | # Load 66 | $home/jobFileLoader.sh $hadoopconfdir $mapredmaxsplitsize $schedulerpoolname $cluster $historyProcessingDir 67 | 68 | # Process 69 | $home/jobFileProcessor.sh $hbaseconfdir $schedulerpoolname $historyProcessingDir $cluster $threads $batchsize $machinetype 70 | -------------------------------------------------------------------------------- /bin/etl/jobFileLoader.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Run on the daemon node for the specific cluster. 19 | # Usage ./jobFileLoader.sh [hadoopconfdir] 20 | # [maxsplitsize] [schedulerpoolname] [cluster] [historyprocessingdir] 21 | 22 | if [ $# -ne 5 ] 23 | then 24 | echo "Usage: `basename $0` [hadoopconfdir] [maxsplitsize] [schedulerpoolname] [cluster] [historyprocessingdir]" 25 | exit 1 26 | fi 27 | 28 | source $(dirname $0)/hraven-etl-env.sh 29 | 30 | myscriptname=$(basename "$0" .sh) 31 | stopfile=$HRAVEN_PID_DIR/$myscriptname.stop 32 | 33 | if [ -f $stopfile ]; then 34 | echo "Error: not allowed to run. Remove $stopfile continue." 1>&2 35 | exit 1 36 | fi 37 | 38 | create_pidfile $HRAVEN_PID_DIR 39 | trap 'cleanup_pidfile_and_exit $HRAVEN_PID_DIR' INT TERM EXIT 40 | 41 | hadoop --config $1 jar $hravenEtlJar com.twitter.hraven.etl.JobFileRawLoader -libjars=$LIBJARS -Dmapred.max.split.size=$2 -Dmapred.fairscheduler.pool=$3 -d -c $4 -p $5 42 | -------------------------------------------------------------------------------- /bin/etl/jobFilePreprocessor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Run on the daemon node per specific cluster 19 | # Usage ./jobFilePreprocessor.sh [hadoopconfdir] 20 | # [historyrawdir] [historyprocessingdir] [cluster] [batchsize] 21 | 22 | if [ $# -ne 6 ] 23 | then 24 | echo "Usage: `basename $0` [hadoopconfdir] [historyrawdir] [historyprocessingdir] [cluster] [batchsize] [defaultrawfilesizelimit]" 25 | exit 1 26 | fi 27 | 28 | source $(dirname $0)/hraven-etl-env.sh 29 | 30 | export HADOOP_HEAPSIZE=4000 31 | myscriptname=$(basename "$0" .sh) 32 | stopfile=$HRAVEN_PID_DIR/$myscriptname.stop 33 | 34 | if [ -f $stopfile ]; then 35 | echo "Error: not allowed to run. Remove $stopfile continue." 1>&2 36 | exit 1 37 | fi 38 | 39 | create_pidfile $HRAVEN_PID_DIR 40 | trap 'cleanup_pidfile_and_exit $HRAVEN_PID_DIR' INT TERM EXIT 41 | 42 | hadoop --config $1 jar $hravenEtlJar com.twitter.hraven.etl.JobFilePreprocessor -libjars=$LIBJARS -d -i $2 -o $3 -c $4 -b $5 -s $6 -------------------------------------------------------------------------------- /bin/etl/jobFileProcessor.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Run on the daemon node per specific cluster 19 | # This script runs on the HBase cluster 20 | # Usage ./jobFileProcessor.sh [hadoopconfdir] 21 | # [schedulerpoolname] [historyprocessingdir] [cluster] [threads] [batchsize] [machinetype] [costfile] 22 | # a sample cost file can be found in the conf dir as sampleCostDetails.properties 23 | 24 | if [ $# -ne 7 ] 25 | then 26 | echo "Usage: `basename $0` [hbaseconfdir] [schedulerpoolname] [historyprocessingdir] [cluster] [threads] [batchsize] [machinetype]" 27 | exit 1 28 | fi 29 | 30 | source $(dirname $0)/hraven-etl-env.sh 31 | 32 | myscriptname=$(basename "$0" .sh) 33 | stopfile=$HRAVEN_PID_DIR/$myscriptname.stop 34 | 35 | if [ -f $stopfile ]; then 36 | echo "Error: not allowed to run. Remove $stopfile continue." 1>&2 37 | exit 1 38 | fi 39 | 40 | create_pidfile $HRAVEN_PID_DIR 41 | trap 'cleanup_pidfile_and_exit $HRAVEN_PID_DIR' INT TERM EXIT 42 | 43 | hadoop --config $1 jar $hravenEtlJar com.twitter.hraven.etl.JobFileProcessor -libjars=$LIBJARS -Dmapred.fairscheduler.pool=$2 -d -p $3 -c $4 -t $5 -b $6 -m $7 44 | 45 | -------------------------------------------------------------------------------- /bin/etl/pidfiles.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # This script does not do anything by itself, but includes functions for dealing with PID file locks. 19 | # 20 | # Usage: 21 | ## Pull in functions to manage pid files. 22 | # source $(dirname $0)/pidfiles.sh 23 | ## Race to get the lock 24 | # create_pidfile 25 | ## Make sure we clean up when done (even when killed, except with -9): 26 | # trap 'cleanup_pidfile_and_exit' INT TERM EXIT 27 | 28 | # 29 | # Create the process file or exit if the previous process is still running. 30 | # Will also exit if we cannot write the pidfile, or delete a previously abandoned one. 31 | # In case of a previously abandoned pidfile, we re-launch ourselves in the background. 32 | # 33 | function create_pidfile() { 34 | mypid=$$ 35 | myscriptname=$(basename "$0" .sh) 36 | pidfile=$1/$myscriptname.pid 37 | # Close stderr so no garbage goes into pidfile, 38 | # then write mypid atomically into the PID file, or fail to write if already there 39 | $(exec 2>&-; set -o noclobber; echo "$mypid" > "$pidfile") 40 | # Check if the lockfile is present 41 | if [ ! -f "$pidfile" ]; then 42 | # pidfile should exist, if not, we failed to create so to bail out 43 | echo pidFile does not exist, exiting 44 | exit 1 45 | fi 46 | # Read the pid from the file 47 | currentpid=$(<"$pidfile") 48 | # Is the recorded pid me? 49 | if [ $mypid -ne $currentpid ]; then 50 | # It is not me. Is the process pid in the lockfile still running? 51 | is_already_running "$pidfile" 52 | if [ $? -ne 0 ]; then 53 | # No. Kill the pidfile and relaunch ourselves properly. 54 | rm "$pidfile" 55 | if [ $? -ne 0 ]; then 56 | echo "Error: unable to delete pidfile $pidfile" 1>&2 57 | else 58 | # fork only if we can delete the pidfile to prevent fork-bomb 59 | $0 $@ & 60 | fi 61 | fi 62 | # We did not own the pid in the pidfile. 63 | exit 64 | fi 65 | } 66 | 67 | # 68 | # Clean up the pidfile that we owned and exit 69 | # After creating the pidfile, call this as: 70 | # trap 'cleanup_pidfile_and_exit' INT TERM EXIT 71 | # 72 | function cleanup_pidfile_and_exit() { 73 | myscriptname=$(basename "$0" .sh) 74 | pidfile=$1/$myscriptname.pid 75 | if [ -f "$pidfile" ]; then 76 | rm "$pidfile" 77 | if [ $? -ne 0 ]; then 78 | echo "Error: unable to delete pidfile $pidfile" 1>&2 79 | fi 80 | fi 81 | exit 82 | } 83 | 84 | # 85 | # For internal use only 86 | # 87 | # param: the pidfile 88 | # returns boolean 0|1 (1=no, 0=yes) 89 | # 90 | function is_already_running() { 91 | pidfile="$1" 92 | if [ ! -f "$pidfile" ]; then 93 | # pid file does not exist 94 | return 1 95 | fi 96 | pid=$(<"$pidfile") 97 | if [ -z "$pid" ]; then 98 | # pid file did not contain a pid 99 | return 1 100 | fi 101 | 102 | # check if a process with this pid exists and is an instance of this script 103 | previous=$(ps -p $pid | grep $(basename $0)) 104 | if [ "$previous" = "" ]; then 105 | # There is no such process running, or the pid is not us 106 | return 1 107 | else 108 | return 0 109 | fi 110 | } -------------------------------------------------------------------------------- /bin/etl/processingRecordsPrinter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # Used to just dump out the process records in a readable form for a given cluster. 19 | # Used for manual debugging and verification. 20 | 21 | # Run on the daemon node per specific cluster 22 | # Usage ./processingRecordsPrinter.sh [hbaseconfdir] [cluster] 23 | 24 | if [ $# -ne 2 ] 25 | then 26 | echo "Usage: `basename $0` [hbaseconfdir] [cluster]" 27 | exit 1 28 | fi 29 | 30 | home=$(dirname $0) 31 | source $home/../../conf/hraven-env.sh 32 | hravenEtlJar=$home/../../lib/hraven-etl.jar 33 | LIBJARS=$home/../../lib/hraven-core.jar 34 | 35 | hadoop --config $1 jar $hravenEtlJar com.twitter.hraven.etl.ProcessingRecordsPrinter -libjars=$LIBJARS -c $2 36 | -------------------------------------------------------------------------------- /bin/find_partial_jobs.rb: -------------------------------------------------------------------------------- 1 | #!./hraven org.jruby.Main 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # 19 | # Prints out incomplete records from the dev.job_history table 20 | # Incomplete records have no 'jobid' column, indicating that 21 | # the job history file has not been loaded 22 | # 23 | 24 | include Java 25 | 26 | import org.apache.hadoop.hbase.HBaseConfiguration 27 | import org.apache.hadoop.hbase.client.HTable 28 | import org.apache.hadoop.hbase.client.Scan 29 | import org.apache.hadoop.hbase.filter.CompareFilter 30 | import org.apache.hadoop.hbase.filter.SingleColumnValueFilter 31 | import org.apache.hadoop.hbase.util.Bytes 32 | 33 | import com.twitter.hraven.Constants 34 | import com.twitter.hraven.datasource.JobKeyConverter 35 | 36 | 37 | c = HBaseConfiguration.create() 38 | historyTable = HTable.new(c, Constants.HISTORY_TABLE_BYTES) 39 | jobidBytes = Bytes.toBytes("jobid") 40 | 41 | scan = Scan.new 42 | # by filtering to return only empty job IDs and _not_ setting filter if missing, we should only get rows missing jobid 43 | scan.setFilter(SingleColumnValueFilter.new(Constants::INFO_FAM_BYTES, jobidBytes, CompareFilter::CompareOp::EQUAL, Constants::EMPTY_BYTES)) 44 | 45 | scanner = historyTable.getScanner(scan) 46 | rowcnt = 0 47 | keyConv = JobKeyConverter.new 48 | 49 | scanner.each { |result| 50 | break if result.nil? || result.isEmpty 51 | rowcnt += 1 52 | rowkey = Bytes.toStringBinary(result.getRow()) 53 | jobid = Bytes.toString(result.getValue(Constants::INFO_FAM_BYTES, jobidBytes)) 54 | puts "#{rowkey}\t#{keyConv.fromBytes(result.getRow())}\t#{jobid}" 55 | } 56 | 57 | puts "Found #{rowcnt} matching jobs" 58 | -------------------------------------------------------------------------------- /bin/find_partial_raw.rb: -------------------------------------------------------------------------------- 1 | #!./hraven org.jruby.Main 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # 19 | # Prints out incomplete records from the dev.job_history table 20 | # Incomplete records have no 'jobid' column, indicating that 21 | # the job history file has not been loaded 22 | # 23 | 24 | include Java 25 | 26 | import org.apache.hadoop.hbase.HBaseConfiguration 27 | import org.apache.hadoop.hbase.client.HTable 28 | import org.apache.hadoop.hbase.client.Scan 29 | import org.apache.hadoop.hbase.filter.CompareFilter 30 | import org.apache.hadoop.hbase.filter.FilterList 31 | import org.apache.hadoop.hbase.filter.SingleColumnValueFilter 32 | import org.apache.hadoop.hbase.util.Bytes 33 | 34 | import com.twitter.hraven.Constants 35 | import com.twitter.hraven.datasource.QualifiedJobIdConverter 36 | 37 | 38 | c = HBaseConfiguration.create() 39 | historyTable = HTable.new(c, Constants.HISTORY_RAW_TABLE_BYTES) 40 | 41 | scan = Scan.new 42 | # by filtering to return only empty job conf or history and _not_ setting filter if missing, we should only get rows missing raw fields 43 | filterList = FilterList.new(FilterList::Operator::MUST_PASS_ONE) 44 | filterList.addFilter(SingleColumnValueFilter.new(Constants::RAW_FAM_BYTES, Constants::JOBCONF_COL_BYTES, CompareFilter::CompareOp::EQUAL, Constants::EMPTY_BYTES)) 45 | filterList.addFilter(SingleColumnValueFilter.new(Constants::RAW_FAM_BYTES, Constants::JOBHISTORY_COL_BYTES, CompareFilter::CompareOp::EQUAL, Constants::EMPTY_BYTES)) 46 | scan.setFilter(filterList) 47 | 48 | scanner = historyTable.getScanner(scan) 49 | rowcnt = 0 50 | keyConv = QualifiedJobIdConverter.new 51 | 52 | scanner.each { |result| 53 | break if result.nil? || result.isEmpty 54 | rowcnt += 1 55 | rowkey = Bytes.toStringBinary(result.getRow()) 56 | jobid = keyConv.fromBytes(result.getRow()) 57 | puts "#{rowkey}\t#{jobid}" 58 | } 59 | 60 | puts "Found #{rowcnt} matching jobs" 61 | -------------------------------------------------------------------------------- /bin/get_flow.rb: -------------------------------------------------------------------------------- 1 | #!./hraven org.jruby.Main 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # 18 | # Outputs the most recent flow for the given user and appId 19 | # 20 | require 'optparse' 21 | include Java 22 | 23 | import java.util.Date 24 | import org.apache.hadoop.hbase.HBaseConfiguration 25 | import org.apache.hadoop.hbase.util.Bytes 26 | import com.twitter.hraven.datasource.JobHistoryService 27 | import com.twitter.hraven.datasource.JobKeyConverter 28 | import com.twitter.hraven.rest.ObjectMapperProvider 29 | 30 | options = {} 31 | options[:tasks] = false 32 | options[:limit] = 1 33 | options[:revision] = nil 34 | options[:json] = false 35 | OptionParser.new do |opts| 36 | opts.banner = "Usage: ./get_flow.rb [options] cluster user app" 37 | 38 | opts.on("-t", "--tasks", "Include task data") do |t| 39 | options[:tasks] = t 40 | end 41 | opts.on("-l", "--limit N", Integer, "Return up to N flows (defaults to 1)") do |n| 42 | options[:limit] = n 43 | end 44 | opts.on("-r", "--revision [REV]", "Only match the given application version") do |r| 45 | options[:revision] = r 46 | end 47 | opts.on("-j", "--json", "Print retrieved flow in JSON format") do |j| 48 | options[:json] = j 49 | end 50 | end.parse! 51 | 52 | def print_json(flows) 53 | mapper = ObjectMapperProvider.createCustomMapper 54 | flows_json = mapper.writeValueAsString(flows) 55 | puts flows_json 56 | end 57 | 58 | def print_text(flows) 59 | keyConv = JobKeyConverter.new 60 | flowcnt = 0 61 | flows.each { |flow| 62 | flowcnt += 1 63 | puts "Flow #{flowcnt}: #{flow.getAppId()}, run by #{flow.getUserName()} at #{Date.new(flow.getRunId())} (#{flow.getRunId}), #{flow.getJobs().size()} jobs" 64 | puts 65 | jobcnt = 0 66 | flow.getJobs().each { |job| 67 | jobcnt += 1 68 | puts "Job #{jobcnt}: #{job.getJobId()} #{job.getJobName()} #{job.getStatus()}" 69 | puts "\tkey: #{Bytes.toStringBinary(keyConv.toBytes(job.getJobKey()))}" 70 | puts "\tsubmitted: #{job.getSubmitDate()} launched: #{job.getLaunchDate()} finished: #{job.getFinishDate()} runtime: #{job.getRunTime()} ms" 71 | puts "\tmaps: #{job.getTotalMaps()} (#{job.getFinishedMaps()} finished / #{job.getFailedMaps()} failed)" 72 | puts "\treduces: #{job.getTotalReduces()} (#{job.getFinishedReduces()} finished / #{job.getFailedReduces()} failed)" 73 | puts 74 | } 75 | } 76 | end 77 | 78 | cluster = ARGV[0] 79 | user = ARGV[1] 80 | app = ARGV[2] 81 | 82 | conf = HBaseConfiguration.create() 83 | service = JobHistoryService.new(conf) 84 | 85 | flows = service.getFlowSeries(cluster, user, app, options[:revision], options[:tasks], options[:limit]) 86 | service.close() 87 | 88 | if flows.nil? 89 | puts "No flows found for user: #{user}, app: #{app}" 90 | else 91 | if options[:json] 92 | print_json(flows) 93 | else 94 | print_text(flows) 95 | end 96 | end 97 | 98 | 99 | -------------------------------------------------------------------------------- /bin/get_flow_events.rb: -------------------------------------------------------------------------------- 1 | #!./hraven org.jruby.Main 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # 19 | # Outputs the most recent flow for the given user and appId 20 | # 21 | require 'optparse' 22 | include Java 23 | 24 | import java.text.SimpleDateFormat 25 | import java.util.Date 26 | import org.apache.hadoop.hbase.HBaseConfiguration 27 | import com.twitter.hraven.datasource.FlowEventService 28 | import com.twitter.hraven.FlowKey 29 | 30 | options = {} 31 | options[:follow] = false 32 | options[:sleepdelay] = 5 33 | OptionParser.new do |opts| 34 | opts.banner = "Usage: ./get_flow_events.rb [options] cluster user app runtimestamp" 35 | 36 | opts.on("-f", "--follow", "Poll for new events") do |t| 37 | options[:follow] = t 38 | end 39 | opts.on("-s", "--sleep N", Integer, "Wait N seconds between attempts when polling (defaults to 5)") do |n| 40 | options[:delay] = n 41 | end 42 | 43 | end.parse! 44 | 45 | DF = SimpleDateFormat.new("MM-dd-yyyy HH:mm:ss") 46 | def show_events(events) 47 | events.each{ |e| 48 | eventTime = DF.format(e.getTimestamp()) 49 | puts "#{e.getFlowEventKey().getSequence()}: #{eventTime} type=#{e.getType()} data=#{e.getEventDataJSON()}" 50 | } 51 | end 52 | 53 | conf = HBaseConfiguration.create 54 | service = FlowEventService.new(conf) 55 | 56 | cluster = ARGV[0] 57 | user = ARGV[1] 58 | app = ARGV[2] 59 | runts = ARGV[3] 60 | 61 | fk = FlowKey.new(cluster, user, app, runts.to_i) 62 | 63 | begin 64 | events = service.getFlowEvents(fk) 65 | show_events(events) 66 | 67 | if options[:follow] 68 | last_e = nil 69 | while true 70 | sleep options[:sleepdelay] 71 | puts "..." 72 | # continue from last event 73 | if events.size() > 0 74 | last_e = events.get(events.size()-1) 75 | end 76 | if !last_e.nil? 77 | events = service.getFlowEventsSince(last_e.getFlowEventKey()) 78 | else 79 | # no events seen yet 80 | events = service.getFlowEvents(fk) 81 | end 82 | show_events(events) 83 | end 84 | end 85 | ensure 86 | service.close() unless service.nil? 87 | end 88 | -------------------------------------------------------------------------------- /bin/get_flow_stats.rb: -------------------------------------------------------------------------------- 1 | #!./hraven org.jruby.Main 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # 19 | # Outputs the most recent flow for the given user and appId 20 | # 21 | require 'optparse' 22 | include Java 23 | 24 | import java.util.Date 25 | import org.apache.hadoop.hbase.HBaseConfiguration 26 | import org.apache.hadoop.hbase.util.Bytes 27 | import com.twitter.hraven.datasource.JobHistoryService 28 | import com.twitter.hraven.datasource.JobKeyConverter 29 | import com.twitter.hraven.rest.ObjectMapperProvider 30 | 31 | options = {} 32 | options[:tasks] = false 33 | options[:limit] = 1 34 | options[:revision] = nil 35 | options[:json] = false 36 | OptionParser.new do |opts| 37 | opts.banner = "Usage: get_flow.rb [options] cluster user app" 38 | 39 | opts.on("-d", "--detail", "Include job details") do |d| 40 | options[:detail] = True 41 | end 42 | opts.on("-l", "--limit N", Integer, "Return up to N flows (defaults to 1)") do |n| 43 | options[:limit] = n 44 | end 45 | opts.on("-r", "--revision [REV]", "Only match the given application version") do |r| 46 | options[:revision] = r 47 | end 48 | opts.on("-j", "--json", "Print retrieved flow in JSON format") do |j| 49 | options[:json] = j 50 | end 51 | opts.on("-s", "--start [TIME]", Integer, "Start time (in millis since epoch)") do |s| 52 | options[:starttime] = s 53 | end 54 | opts.on("-e", "--end [TIME]", Integer, "End time (in millis since epoch)") do |e| 55 | options[:endtime] = e 56 | end 57 | end.parse! 58 | 59 | def print_json(flows) 60 | mapper = ObjectMapperProvider.createCustomMapper 61 | flows_json = mapper.writeValueAsString(flows) 62 | puts flows_json 63 | end 64 | 65 | def print_text(flows, includeJobs) 66 | keyConv = JobKeyConverter.new 67 | flowcnt = 0 68 | flows.each { |flow| 69 | flowcnt += 1 70 | puts "Flow #{flowcnt}: #{flow.getAppId()}, run by #{flow.getUserName()} at #{Date.new(flow.getRunId())} (#{flow.getRunId}), #{flow.getJobs().size()} jobs" 71 | puts 72 | if includeJobs 73 | jobcnt = 0 74 | flow.getJobs().each { |job| 75 | jobcnt += 1 76 | puts "Job #{jobcnt}: #{job.getJobId()} #{job.getJobName()} #{job.getStatus()}" 77 | puts "\tkey: #{Bytes.toStringBinary(keyConv.toBytes(job.getJobKey()))}" 78 | puts "\tsubmitted: #{job.getSubmitDate()} launched: #{job.getLaunchDate()} finished: #{job.getFinishDate()} runtime: #{job.getRunTime()} ms" 79 | puts "\tmaps: #{job.getTotalMaps()} (#{job.getFinishedMaps()} finished / #{job.getFailedMaps()} failed)" 80 | puts "\treduces: #{job.getTotalReduces()} (#{job.getFinishedReduces()} finished / #{job.getFailedReduces()} failed)" 81 | puts 82 | } 83 | end 84 | } 85 | end 86 | 87 | cluster = ARGV[0] 88 | user = ARGV[1] 89 | app = ARGV[2] 90 | 91 | conf = HBaseConfiguration.create() 92 | #conf.set('hbase.client.scanner.caching', '1000') 93 | service = JobHistoryService.new(conf) 94 | 95 | flows = service.getFlowTimeSeriesStats(cluster, user, app, options[:revision], options[:starttime], options[:endtime], options[:limit], nil) 96 | service.close() 97 | 98 | if flows.nil? 99 | puts "No flows found for user: #{user}, app: #{app}" 100 | else 101 | if options[:json] 102 | print_json(flows) 103 | else 104 | print_text(flows, options[:detail]) 105 | end 106 | end 107 | 108 | 109 | -------------------------------------------------------------------------------- /bin/get_job.rb: -------------------------------------------------------------------------------- 1 | #!./hraven org.jruby.Main 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # 19 | # Outputs a single job's details 20 | # 21 | require 'optparse' 22 | include Java 23 | 24 | import org.apache.hadoop.hbase.HBaseConfiguration 25 | import com.twitter.hraven.datasource.JobHistoryService 26 | 27 | options = {} 28 | options[:tasks] = false 29 | OptionParser.new do |opts| 30 | opts.banner = "Usage: ./get_job.rb [options] cluster jobId" 31 | 32 | opts.on("-t", "--tasks", "Include task data") do |t| 33 | options[:tasks] = t 34 | end 35 | end.parse! 36 | 37 | cluster = ARGV[0] 38 | jobid = ARGV[1] 39 | 40 | conf = HBaseConfiguration.create() 41 | service = JobHistoryService.new(conf) 42 | 43 | job = service.getJobByJobID(cluster, jobid, options[:tasks]) 44 | service.close() 45 | 46 | if job.nil? 47 | puts "No job found for cluster: #{cluster}, jobid: #{jobid}" 48 | else 49 | puts "Job: #{job.getJobId()} #{job.getJobName()} #{job.getStatus()}" 50 | puts "\tsubmitted: #{job.getSubmitDate()} launched: #{job.getLaunchDate()} finished: #{job.getFinishDate()} runtime: #{job.getRunTime()} ms" 51 | puts "\tmaps: #{job.getTotalMaps()} (#{job.getFinishedMaps()} finished / #{job.getFailedMaps()} failed)" 52 | puts "\treduces: #{job.getTotalReduces()} (#{job.getFinishedReduces()} finished / #{job.getFailedReduces()} failed)" 53 | if options[:tasks] 54 | puts "Tasks:" 55 | job.getTasks().each { |task| 56 | puts "\t#{task.getTaskId()}: #{task.getTaskAttemptId()} type: #{task.getType()} status: #{task.getStatus()}" 57 | } 58 | end 59 | end 60 | 61 | 62 | -------------------------------------------------------------------------------- /bin/get_raw.rb: -------------------------------------------------------------------------------- 1 | #!./hraven org.jruby.Main 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # 19 | # Outputs a single job's details 20 | # 21 | require 'optparse' 22 | include Java 23 | 24 | import java.lang.System 25 | import org.apache.hadoop.hbase.HBaseConfiguration 26 | import com.twitter.hraven.datasource.JobHistoryRawService 27 | import com.twitter.hraven.QualifiedJobId 28 | 29 | options = {} 30 | 31 | OptionParser.new do |opts| 32 | opts.banner = "Usage: ./get_raw.rb [options] cluster jobId" 33 | 34 | opts.on("-t", "--type TYPE", [:conf, :history], "Raw field to output (conf, history)") do |t| 35 | options[:type] = t 36 | end 37 | opts.on("-f", "--file [FILENAME]", "Write the raw field to the file FILENAME") do |f| 38 | options[:filename] = f 39 | end 40 | end.parse! 41 | 42 | cluster = ARGV[0] 43 | jobid = ARGV[1] 44 | 45 | qualifiedId = QualifiedJobId.new(cluster, jobid) 46 | 47 | conf = HBaseConfiguration.create() 48 | service = JobHistoryRawService.new(conf) 49 | 50 | 51 | if options[:type] == :conf 52 | rawConf = service.getRawJobConfiguration(qualifiedId) 53 | if rawConf.nil? 54 | puts "No job configuration found for #{qualifiedId}" 55 | exit 1 56 | end 57 | rawConf.writeXml(System.out) 58 | elsif options[:type] == :history 59 | rawHistory = service.getRawJobHistory(qualifiedId) 60 | if rawHistory.nil? 61 | puts "No job history found for #{qualifiedId}" 62 | exit 1 63 | end 64 | puts rawHistory 65 | end 66 | 67 | service.close() 68 | -------------------------------------------------------------------------------- /conf/hraven-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Copyright 2013 Twitter, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | # Used to configure hRaven environment 18 | 19 | # JAVA_HOME The java implementation to use. Overrides JAVA_HOME. 20 | # export JAVA_HOME= 21 | 22 | # HBASE_CONF_DIR Alternate directory from which to pick up hbase configurations. Default is ${HBASE_HOME}/conf. 23 | # All other hbase configurations can be set in the standard hbase manner, or supplied here instead. 24 | # export HBASE_CONF_DIR= 25 | 26 | # HADOOP_CONF_DIR Alternate directory from which to pick up hadoop configurations. Default is ${HADOOP_HOME}/conf. 27 | # All other hadoop configurations can be set in the standard hadoop manner, or supplied here instead. 28 | # export HADOOP_CONF_DIR= 29 | 30 | # HBASE_CLASSPATH Used in hraven-etl-env.sh 31 | export HBASE_CLASSPATH=`hbase classpath` 32 | 33 | # export HRAVEN_CLASSPATH=$HBASE_CLASSPATH 34 | # export HRAVEN_CLASSPATH=`hbase --config /etc/hbase/conf-hbase-tst-dc1 classpath` 35 | export HRAVEN_CLASSPATH= 36 | 37 | # The maximum amount of heap to use, in MB. Default is 1000. 38 | # export HRAVEN_HEAPSIZE=1000 39 | 40 | # Location for process ID files for any hRaven daemons 41 | export HRAVEN_PID_DIR=/tmp/ 42 | -------------------------------------------------------------------------------- /conf/log4j.properties: -------------------------------------------------------------------------------- 1 | # Define some default values that can be overridden by system properties 2 | # from other start scripts 3 | hraven.root.logger=WARN,console 4 | hraven.log.dir=. 5 | hraven.log.file=hraven.log 6 | 7 | # Define the root logger to the system property "hraven.root.logger". 8 | log4j.rootLogger=${hraven.root.logger} 9 | 10 | # Logging Threshold 11 | log4j.threshold=ALL 12 | 13 | # 14 | # Daily Rolling File Appender 15 | # 16 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 17 | log4j.appender.DRFA.File=${hraven.log.dir}/${hraven.log.file} 18 | 19 | # Rollver at midnight 20 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 21 | 22 | # 30-day backup 23 | #log4j.appender.DRFA.MaxBackupIndex=30 24 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 25 | 26 | # Pattern format: Date LogLevel LoggerName LogMessage 27 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 28 | 29 | # 30 | # console 31 | # Add "console" to rootlogger above if you want to use this 32 | # 33 | log4j.appender.console=org.apache.log4j.ConsoleAppender 34 | log4j.appender.console.target=System.err 35 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 36 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n 37 | 38 | # Custom Logging levels 39 | 40 | log4j.logger.com.twitter.hraven=INFO 41 | -------------------------------------------------------------------------------- /conf/sampleCostDetails.properties: -------------------------------------------------------------------------------- 1 | # properties file to store cost data 2 | # used in calculating cost of a job in the processing step 3 | # 4 | # machine type is the hardware name of node that the job runs on 5 | # 6 | # compute cost is the part of dollar amount of total cost of operating a machine 7 | # allocated to compute 8 | # 9 | # machinememory is the max amount of memory at run time in 10 | # megabytes available to a hadoop job 11 | # 12 | default.computecost=10 13 | default.machinememory=24576 14 | # 15 | -------------------------------------------------------------------------------- /hraven-assembly/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 15 | 16 | 4.0.0 17 | 18 | com.twitter.hraven 19 | hraven 20 | 1.0.2-SNAPSHOT 21 | ../ 22 | 23 | hraven-assembly 24 | hRaven-assembly 25 | hRaven - Assembly artifacts 26 | pom 27 | 28 | 29 | 30 | 31 | maven-assembly-plugin 32 | 2.3 33 | 34 | hraven-${project.version} 35 | false 36 | gnu 37 | true 38 | false 39 | -Xmx1024m 40 | 41 | src/main/assembly/all.xml 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /hraven-assembly/src/main/assembly/all.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 24 | 27 | all 28 | 29 | tar.gz 30 | 31 | 32 | 33 | 34 | ${project.basedir}/.. 35 | . 36 | 37 | *.txt 38 | 39 | 0644 40 | 41 | 42 | 43 | ${project.basedir}/../conf 44 | conf 45 | 0644 46 | 0755 47 | 48 | 49 | 50 | ${project.basedir}/../bin 51 | bin 52 | 0755 53 | 0755 54 | 55 | 56 | 57 | ${project.basedir}/../hraven-core/src/main/scripts 58 | scripts 59 | 0644 60 | 0755 61 | 62 | 63 | 64 | 65 | true 66 | 67 | lib 68 | false 69 | 70 | 71 | runtime 72 | 0644 73 | 0644 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/AppAggregationKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2014 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package com.twitter.hraven; 18 | 19 | /** 20 | * Represents the row key that stores Aggregations for an app 21 | */ 22 | public class AppAggregationKey extends AppKey { 23 | 24 | /** 25 | * timestamp stored as part of row key in aggregation table 26 | * this is usually top of the day or top of the week timestamp 27 | * All apps that belong to that day (or that week for weekly aggregations) 28 | * have the same aggregation id 29 | * 30 | * If a {@link Flow} (like a pig job or a scalding job) spans more than a 1 day, 31 | * the aggregationId is the day that the first job in that Flow started running, 32 | * which is the submitTime or runId of that {@link Flow} 33 | */ 34 | private long aggregationId; 35 | 36 | public AppAggregationKey(String cluster, String userName, String appId, Long ts) { 37 | super(cluster, userName, appId); 38 | this.setAggregationId(ts); 39 | } 40 | 41 | public long getAggregationId() { 42 | return aggregationId; 43 | } 44 | 45 | public void setAggregationId(long aggregationId) { 46 | this.aggregationId = aggregationId; 47 | } 48 | 49 | /** 50 | * Encodes the given timestamp for ordering by run ID 51 | */ 52 | public static long encodeAggregationId(long timestamp) { 53 | return Long.MAX_VALUE - timestamp; 54 | } 55 | 56 | /** 57 | * Inverted version of {@link AppAggregationKey#getaggregationId()} used in the byte representation for 58 | * reverse chronological sorting. 59 | * @return 60 | */ 61 | public long getEncodedAggregationId() { 62 | return encodeAggregationId(aggregationId); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/AppKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2014 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package com.twitter.hraven; 18 | 19 | import org.apache.commons.lang.builder.CompareToBuilder; 20 | import org.apache.commons.lang.builder.HashCodeBuilder; 21 | import org.codehaus.jackson.annotate.JsonCreator; 22 | import org.codehaus.jackson.annotate.JsonProperty; 23 | 24 | public class AppKey implements Comparable { 25 | 26 | /** 27 | * The cluster on which the application ran 28 | */ 29 | protected final String cluster; 30 | /** 31 | * Who ran the application on Hadoop 32 | */ 33 | protected final String userName; 34 | 35 | /** 36 | * The thing that identifies an application, 37 | * such as Pig script identifier, or Scalding identifier. 38 | */ 39 | protected final String appId; 40 | 41 | @JsonCreator 42 | public AppKey(@JsonProperty("cluster") String cluster, @JsonProperty("userName") String userName, 43 | @JsonProperty("appId") String appId) { 44 | this.cluster = cluster; 45 | this.userName = (null == userName) ? Constants.UNKNOWN : userName.trim(); 46 | this.appId = (null == appId) ? Constants.UNKNOWN : appId.trim(); 47 | } 48 | 49 | /** 50 | * @return The cluster on which the job ran. 51 | */ 52 | public String getCluster() { 53 | return cluster; 54 | } 55 | 56 | /** 57 | * @return Who ran the application 58 | */ 59 | public String getUserName() { 60 | return userName; 61 | } 62 | 63 | /** 64 | * @return The thing that identifies an application, such as Pig script 65 | * identifier, or Scalding identifier. 66 | */ 67 | public String getAppId() { 68 | return appId; 69 | } 70 | 71 | public String toString() { 72 | return getCluster() + Constants.SEP + getUserName() + Constants.SEP + getAppId(); 73 | } 74 | /** 75 | * Compares two AppKey objects on the basis of their cluster, userName, appId and encodedRunId 76 | * @param other 77 | * @return 0 if this cluster, userName, appId are equal to the other's 78 | * cluster, userName, appId, 79 | * 1 if this cluster or userName or appId are less than the other's 80 | * cluster, userName, appId, 81 | * -1 if this cluster and userName and appId are greater the other's 82 | * cluster, userName, appId 83 | */ 84 | @Override 85 | public int compareTo(Object other) { 86 | if (other == null) { 87 | return -1; 88 | } 89 | AppKey otherKey = (AppKey) other; 90 | return new CompareToBuilder() 91 | .append(this.cluster, otherKey.getCluster()) 92 | .append(this.userName, otherKey.getUserName()) 93 | .append(this.appId, otherKey.getAppId()) 94 | .toComparison(); 95 | } 96 | 97 | @Override 98 | public boolean equals(Object other) { 99 | if (other instanceof AppKey) { 100 | return compareTo((AppKey) other) == 0; 101 | } 102 | return false; 103 | } 104 | 105 | @Override 106 | public int hashCode() { 107 | return new HashCodeBuilder() 108 | .append(this.cluster) 109 | .append(this.userName) 110 | .append(this.appId) 111 | .toHashCode(); 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/Cluster.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import java.io.IOException; 19 | import java.io.InputStream; 20 | import java.util.HashMap; 21 | import java.util.Map; 22 | import java.util.Properties; 23 | import java.util.Set; 24 | 25 | import org.apache.commons.lang.StringUtils; 26 | import org.apache.commons.logging.Log; 27 | import org.apache.commons.logging.LogFactory; 28 | 29 | public class Cluster { 30 | private static Map CLUSTERS_BY_HOST = new HashMap(); 31 | private static Log LOG = LogFactory.getLog(Cluster.class); 32 | 33 | public static String getIdentifier(String hostname) { 34 | return CLUSTERS_BY_HOST.get(hostname); 35 | } 36 | 37 | static { 38 | loadHadoopClustersProps(null); 39 | } 40 | 41 | // package level visibility to enable 42 | // testing with different properties file names 43 | static void loadHadoopClustersProps(String filename) { 44 | // read the property file 45 | // populate the map 46 | Properties prop = new Properties(); 47 | if (StringUtils.isBlank(filename)) { 48 | filename = Constants.HRAVEN_CLUSTER_PROPERTIES_FILENAME; 49 | } 50 | try { 51 | //TODO : property file to be moved out from resources into config dir 52 | InputStream inp = Cluster.class.getResourceAsStream("/" + filename); 53 | if (inp == null) { 54 | LOG.error(filename 55 | + " for mapping clusters to cluster identifiers in hRaven does not exist"); 56 | return; 57 | } 58 | prop.load(inp); 59 | Set hostnames = prop.stringPropertyNames(); 60 | for (String h : hostnames) { 61 | CLUSTERS_BY_HOST.put(h, prop.getProperty(h)); 62 | } 63 | } catch (IOException e) { 64 | // An ExceptionInInitializerError will be thrown to indicate that an 65 | // exception occurred during evaluation of a static initializer or the 66 | // initializer for a static variable. 67 | throw new ExceptionInInitializerError(" Could not load properties file " + filename 68 | + " for mapping clusters to cluster identifiers in hRaven"); 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/Counter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | /** 19 | * Represents a single counter entry, which may be associated with a job, 20 | * task, or task attempt. 21 | */ 22 | public class Counter { 23 | private String group; 24 | private String key; 25 | private long value; 26 | 27 | public Counter(String group, String key, long value) { 28 | this.group = group; 29 | this.key = key; 30 | this.value = value; 31 | } 32 | 33 | public String getGroup() { 34 | return group; 35 | } 36 | 37 | public String getKey() { 38 | return key; 39 | } 40 | 41 | public long getValue() { 42 | return value; 43 | } 44 | 45 | @Override 46 | public boolean equals(Object other) { 47 | if (other instanceof Counter) { 48 | Counter otherCounter = (Counter)other; 49 | return this.group.equals(otherCounter.group) && 50 | this.key.equals(otherCounter.key) && 51 | this.value == otherCounter.value; 52 | } 53 | return false; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/CounterMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import java.util.HashMap; 19 | import java.util.Iterator; 20 | import java.util.Map; 21 | import java.util.NoSuchElementException; 22 | import java.util.Set; 23 | 24 | import org.codehaus.jackson.map.annotate.JsonSerialize; 25 | 26 | @JsonSerialize( 27 | include=JsonSerialize.Inclusion.NON_NULL 28 | ) 29 | public class CounterMap implements Iterable { 30 | private final Map> internalMap = new HashMap>(); 31 | 32 | public Set getGroups() { 33 | return internalMap.keySet(); 34 | } 35 | 36 | public Map getGroup(String group) { 37 | return internalMap.get(group); 38 | } 39 | 40 | public Counter getCounter(String group, String name) { 41 | Map groupCounters = getGroup(group); 42 | if (groupCounters != null) { 43 | return groupCounters.get(name); 44 | } 45 | 46 | return null; 47 | } 48 | 49 | public Counter add(Counter counter) { 50 | Map groupCounters = internalMap.get(counter.getGroup()); 51 | if (groupCounters == null) { 52 | groupCounters = new HashMap(); 53 | internalMap.put(counter.getGroup(), groupCounters); 54 | } 55 | return groupCounters.put(counter.getKey(), counter); 56 | } 57 | 58 | public void addAll(Iterable counters) { 59 | if (counters != null) { 60 | for (Counter c : counters) { 61 | add(c); 62 | } 63 | } 64 | } 65 | 66 | /** 67 | * Returns an iterator over all the contained {@link Counter} instances for all groups. 68 | * Note that the {@code remove()} operation is not supported in the returned 69 | * {@code Iterator}. 70 | * @return 71 | */ 72 | @Override 73 | public Iterator iterator() { 74 | return new Iterator() { 75 | private Iterator>> groupIter = 76 | internalMap.entrySet().iterator(); 77 | private Iterator> currentGroupIter = null; 78 | 79 | @Override 80 | public boolean hasNext() { 81 | // advance current group if necessary 82 | if ((currentGroupIter == null || !currentGroupIter.hasNext()) && groupIter.hasNext()) { 83 | currentGroupIter = groupIter.next().getValue().entrySet().iterator(); 84 | } 85 | return currentGroupIter != null && currentGroupIter.hasNext(); 86 | } 87 | 88 | @Override 89 | public Counter next() { 90 | if (!hasNext()) { 91 | throw new NoSuchElementException("No more elements in iterator"); 92 | } 93 | return currentGroupIter.next().getValue(); 94 | } 95 | 96 | @Override 97 | public void remove() { 98 | throw new UnsupportedOperationException("remove() is not supported by CounterMap"); 99 | } 100 | }; 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/FlowEvent.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | /** 19 | * Represents an event generated during flow execution 20 | */ 21 | public class FlowEvent { 22 | private FlowEventKey key; 23 | private long timestamp; 24 | private Framework framework; 25 | private String type; 26 | private String eventDataJSON; 27 | 28 | public FlowEvent(FlowEventKey key) { 29 | this.key = key; 30 | } 31 | 32 | public FlowEventKey getFlowEventKey() { 33 | return this.key; 34 | } 35 | 36 | public long getTimestamp() { 37 | return timestamp; 38 | } 39 | 40 | public void setTimestamp(long timestamp) { 41 | this.timestamp = timestamp; 42 | } 43 | 44 | public Framework getFramework() { 45 | return framework; 46 | } 47 | 48 | public void setFramework(Framework framework) { 49 | this.framework = framework; 50 | } 51 | 52 | public String getType() { 53 | return type; 54 | } 55 | 56 | public void setType(String type) { 57 | this.type = type; 58 | } 59 | 60 | public String getEventDataJSON() { 61 | return eventDataJSON; 62 | } 63 | 64 | public void setEventDataJSON(String eventDataJSON) { 65 | this.eventDataJSON = eventDataJSON; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/FlowEventKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | /** 19 | * Key class representing rows in the {@link Constants#FLOW_EVENT_TABLE} table. 20 | */ 21 | public class FlowEventKey extends FlowKey { 22 | private int sequence; 23 | 24 | public FlowEventKey(FlowKey flowKey, int sequence) { 25 | super(flowKey); 26 | this.sequence = sequence; 27 | } 28 | 29 | public FlowEventKey(String cluster, String user, String appId, long runId, int sequence) { 30 | super(cluster, user, appId, runId); 31 | this.sequence = sequence; 32 | } 33 | 34 | public int getSequence() { 35 | return this.sequence; 36 | } 37 | 38 | @Override 39 | public boolean equals(Object other) { 40 | if (other == null || !(other instanceof FlowEventKey)) { 41 | return false; 42 | } 43 | FlowEventKey otherKey = (FlowEventKey)other; 44 | return super.equals(other) && this.sequence == otherKey.sequence; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/FlowKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import org.apache.commons.lang.builder.CompareToBuilder; 19 | import org.apache.commons.lang.builder.HashCodeBuilder; 20 | import org.codehaus.jackson.annotate.JsonCreator; 21 | import org.codehaus.jackson.annotate.JsonProperty; 22 | 23 | public class FlowKey extends AppKey implements Comparable { 24 | 25 | /** 26 | * Identifying one single run of a version of an app. Smaller values indicate 27 | * a later run. We're using an inverted timestamp Long.MAXVALUE - 28 | * timstampMillis (milliseconds since January 1, 1970 UTC) 29 | */ 30 | protected long runId; 31 | 32 | @JsonCreator 33 | public FlowKey(@JsonProperty("cluster") String cluster, 34 | @JsonProperty("userName") String userName, 35 | @JsonProperty("appId") String appId, 36 | @JsonProperty("runId") long runId) { 37 | super(cluster, userName, appId); 38 | this.runId = runId; 39 | } 40 | 41 | public FlowKey(FlowKey toCopy) { 42 | this(toCopy.getCluster(), toCopy.getUserName(), toCopy.getAppId(), toCopy.getRunId()); 43 | } 44 | 45 | /** 46 | * Inverted version of {@link JobKey#getRunId()} 47 | * used in the byte representation for reverse chronological sorting. 48 | * @return 49 | */ 50 | public long getEncodedRunId() { 51 | return encodeRunId(runId); 52 | } 53 | 54 | /** 55 | * Encodes the given timestamp for ordering by run ID 56 | */ 57 | public static long encodeRunId(long timestamp) { 58 | return Long.MAX_VALUE - timestamp; 59 | } 60 | 61 | /** 62 | * @return Identifying one single run of a version of an app. A smaller value 63 | * should indicate a later run. 64 | */ 65 | public long getRunId() { 66 | return runId; 67 | } 68 | 69 | public String toString() { 70 | return super.toString() + Constants.SEP + this.getRunId(); 71 | } 72 | 73 | /** 74 | * Compares two FlowKey objects on the basis of 75 | * their cluster, userName, appId and encodedRunId 76 | * 77 | * @param other 78 | * @return 0 if this cluster, userName, appId and encodedRunId are equal to 79 | * the other's cluster, userName, appId and encodedRunId, 80 | * 1 if this cluster or userName or appId or encodedRunId are less than 81 | * the other's cluster, userName, appId and encodedRunId, 82 | * -1 if this cluster and userName and appId and encodedRunId are greater 83 | * the other's cluster, userName, appId and encodedRunId, 84 | * 85 | */ 86 | @Override 87 | public int compareTo(Object other) { 88 | if (other == null) { 89 | return -1; 90 | } 91 | FlowKey otherKey = (FlowKey)other; 92 | return new CompareToBuilder() 93 | .appendSuper(super.compareTo(other)) 94 | .append(getEncodedRunId(), otherKey.getEncodedRunId()) 95 | .toComparison(); 96 | } 97 | 98 | @Override 99 | public boolean equals(Object other) { 100 | if (other instanceof FlowKey) { 101 | return compareTo((FlowKey)other) == 0; 102 | } 103 | return false; 104 | } 105 | 106 | @Override 107 | public int hashCode(){ 108 | return new HashCodeBuilder() 109 | .appendSuper(super.hashCode()) 110 | .append(getEncodedRunId()) 111 | .toHashCode(); 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/FlowQueueKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import org.apache.commons.lang.builder.EqualsBuilder; 19 | import org.apache.commons.lang.builder.ToStringBuilder; 20 | 21 | /** 22 | * Represents the row key for an entry in the flow_queue table. flow_queue rows are keyed by: 23 | * - cluster 24 | * - status code 25 | * - inverted timestamp 26 | * - unique ID 27 | */ 28 | public class FlowQueueKey { 29 | private final String cluster; 30 | private final Flow.Status status; 31 | private final long timestamp; 32 | private final String flowId; 33 | 34 | public FlowQueueKey(String cluster, Flow.Status status, long timestamp, String flowId) { 35 | this.cluster = cluster; 36 | this.status = status; 37 | this.timestamp = timestamp; 38 | this.flowId = flowId; 39 | } 40 | 41 | public String getCluster() { 42 | return cluster; 43 | } 44 | 45 | public Flow.Status getStatus() { 46 | return status; 47 | } 48 | 49 | public long getTimestamp() { 50 | return timestamp; 51 | } 52 | 53 | public String getFlowId() { 54 | return flowId; 55 | } 56 | 57 | @Override 58 | public boolean equals(Object other) { 59 | if (other == null || !(other instanceof FlowQueueKey)) { 60 | return false; 61 | } 62 | FlowQueueKey otherKey = (FlowQueueKey)other; 63 | return new EqualsBuilder().append(this.cluster, otherKey.cluster) 64 | .append(this.status, otherKey.status) 65 | .append(this.timestamp, otherKey.timestamp) 66 | .append(this.flowId, otherKey.flowId) 67 | .isEquals(); 68 | } 69 | 70 | public String toString() { 71 | return new ToStringBuilder(this) 72 | .append(this.cluster) 73 | .append(this.status) 74 | .append(this.timestamp) 75 | .append(this.flowId) 76 | .toString(); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/Framework.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | /** 19 | * Used to distinguish the framework used to launch the map-reduce job with. 20 | */ 21 | public enum Framework { 22 | 23 | /** 24 | * Identifies Pig applications/ pig scripts 25 | */ 26 | PIG("p", "pig"), 27 | /** 28 | * 29 | */ 30 | SCALDING("s", "scalding"), 31 | /** 32 | * 33 | */ 34 | NONE("n", "none, plain map-reduce"); 35 | 36 | // TODO: Add Hive as a framework and at least recognize those jobs as such. 37 | 38 | /** 39 | * The code representing this application type as used in the {@link JobDesc} 40 | */ 41 | private final String code; 42 | 43 | /** 44 | * The description for this {@link Framework} 45 | */ 46 | private final String description; 47 | 48 | /** 49 | * Constructor 50 | * 51 | * @param code 52 | * for this type 53 | * @param description 54 | * for this type 55 | */ 56 | private Framework(String code, String description) { 57 | this.code = code; 58 | this.description = description; 59 | } 60 | 61 | /** 62 | * @return the code corresponding to this type. 63 | */ 64 | public String getCode() { 65 | return code; 66 | } 67 | 68 | /** 69 | * @return the description for this type. 70 | */ 71 | public String getDescription() { 72 | return description; 73 | } 74 | 75 | /** 76 | * Get the {@link Framework} corresponding to this code, or none if not 77 | * specifically Pig or Scalding 78 | * 79 | * @param code 80 | */ 81 | public static Framework get(String code) { 82 | 83 | for (Framework framework : Framework.values()) { 84 | if (framework.getCode().equals(code)) { 85 | return framework; 86 | } 87 | } 88 | return NONE; 89 | } 90 | 91 | } 92 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/HadoopVersion.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package com.twitter.hraven; 18 | 19 | /** 20 | * hadoop versions 21 | */ 22 | public enum HadoopVersion { 23 | ONE, TWO 24 | }; 25 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/HravenResponseMetrics.java: -------------------------------------------------------------------------------- 1 | package com.twitter.hraven; 2 | 3 | import java.util.concurrent.atomic.AtomicLong; 4 | 5 | import com.google.common.util.concurrent.AtomicDouble; 6 | import com.twitter.common.stats.Stats; 7 | 8 | /** 9 | * defines the metrics to be collected 10 | * 11 | */ 12 | public class HravenResponseMetrics { 13 | public final static String JOB_API_LATENCY = "JOB_API_LATENCY"; 14 | public static AtomicLong JOB_API_LATENCY_VALUE; 15 | 16 | public final static String FLOW_API_LATENCY = "FLOW_API_LATENCY"; 17 | public static AtomicLong FLOW_API_LATENCY_VALUE; 18 | 19 | public final static String FLOW_STATS_API_LATENCY = "FLOW_STATS_API_LATENCY"; 20 | public static AtomicLong FLOW_STATS_API_LATENCY_VALUE; 21 | 22 | public final static String FLOW_VERSION_API_LATENCY = "FLOW_VERSION_API_LATENCY"; 23 | public static AtomicLong FLOW_VERSION_API_LATENCY_VALUE; 24 | 25 | public final static String FLOW_HBASE_RESULT_SIZE = "FLOW_HBASE_RESULT_SIZE"; 26 | public static AtomicDouble FLOW_HBASE_RESULT_SIZE_VALUE; 27 | 28 | public final static String JOBFLOW_API_LATENCY = "JOBFLOW_API_LATENCY"; 29 | public static AtomicLong JOBFLOW_API_LATENCY_VALUE; 30 | 31 | public final static String TASKS_API_LATENCY = "TASKS_API_LATENCY"; 32 | public static AtomicLong TASKS_API_LATENCY_VALUE; 33 | 34 | public final static String APPVERSIONS_API_LATENCY = "APPVERSIONS_API_LATENCY"; 35 | public static AtomicLong APPVERSIONS_API_LATENCY_VALUE; 36 | 37 | public final static String HDFS_STATS_API_LATENCY = "HDFS_STATS_API_LATENCY"; 38 | public static AtomicLong HDFS_STATS_API_LATENCY_VALUE; 39 | 40 | public final static String HDFS_TIMESERIES_API_LATENCY = "HDFS_TIMESERIES_API_LATENCY"; 41 | public static AtomicLong HDFS_TIMESERIES_API_LATENCY_VALUE; 42 | 43 | public final static String NEW_JOBS_API_LATENCY = "NEW_JOBS_API_LATENCY"; 44 | public static AtomicLong NEW_JOBS_API_LATENCY_VALUE; 45 | 46 | static { 47 | /** initialize metrics */ 48 | JOB_API_LATENCY_VALUE = Stats.exportLong(JOB_API_LATENCY); 49 | FLOW_API_LATENCY_VALUE = Stats.exportLong(FLOW_API_LATENCY); 50 | FLOW_STATS_API_LATENCY_VALUE = Stats.exportLong(FLOW_STATS_API_LATENCY); 51 | FLOW_VERSION_API_LATENCY_VALUE = Stats.exportLong(FLOW_VERSION_API_LATENCY); 52 | FLOW_HBASE_RESULT_SIZE_VALUE = Stats.exportDouble(FLOW_HBASE_RESULT_SIZE); 53 | JOBFLOW_API_LATENCY_VALUE = Stats.exportLong(JOBFLOW_API_LATENCY); 54 | TASKS_API_LATENCY_VALUE = Stats.exportLong(TASKS_API_LATENCY); 55 | APPVERSIONS_API_LATENCY_VALUE = Stats.exportLong(APPVERSIONS_API_LATENCY); 56 | HDFS_STATS_API_LATENCY_VALUE = Stats.exportLong(HDFS_STATS_API_LATENCY); 57 | HDFS_TIMESERIES_API_LATENCY_VALUE = Stats.exportLong(HDFS_TIMESERIES_API_LATENCY); 58 | NEW_JOBS_API_LATENCY_VALUE = Stats.exportLong(NEW_JOBS_API_LATENCY); 59 | 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/MRJobDescFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import org.apache.hadoop.conf.Configuration; 19 | 20 | /** 21 | * Used to create {@link JobKey} instances that can deal with 22 | * {@link Configuration} file (contents) for {@link Framework#NONE} 23 | * 24 | */ 25 | public class MRJobDescFactory extends JobDescFactoryBase { 26 | 27 | /* 28 | * (non-Javadoc) 29 | * 30 | * @see 31 | * com.twitter.hraven.JobKeyFactoryBase#create(com.twitter.corestorage 32 | * .rhaven.QualifiedJobId, long, org.apache.hadoop.conf.Configuration) 33 | */ 34 | @Override 35 | JobDesc create(QualifiedJobId qualifiedJobId, long submitTimeMillis, 36 | Configuration jobConf) { 37 | // TODO: Get the actual values appropriate for the plain Hadoop jobs. 38 | 39 | String appId = getAppId(jobConf); 40 | 41 | long appSubmitTimeMillis = jobConf.getLong(Constants.MR_RUN_CONF_KEY, 42 | submitTimeMillis); 43 | 44 | 45 | return create(qualifiedJobId, jobConf, appId, Constants.UNKNOWN, 46 | Framework.NONE, appSubmitTimeMillis); 47 | } 48 | 49 | /* 50 | * (non-Javadoc) 51 | * 52 | * @see 53 | * com.twitter.hraven.JobDescFactoryBase#getAppIdFromJobName(java.lang.String) 54 | */ 55 | @Override 56 | String getAppIdFromJobName(String jobName) { 57 | int firstOpenBracketPos = jobName.indexOf("["); 58 | if (firstOpenBracketPos > -1) { 59 | return jobName.substring(0, firstOpenBracketPos); 60 | } 61 | return jobName; 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/PigJobDescFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import java.util.regex.Matcher; 19 | import java.util.regex.Pattern; 20 | 21 | import org.apache.hadoop.conf.Configuration; 22 | 23 | /** 24 | * Used to {@link JobKey} instances that can deal with {@link Configuration} 25 | * file (contents) for {@link Framework#PIG} 26 | */ 27 | public class PigJobDescFactory extends JobDescFactoryBase { 28 | 29 | private static Pattern scheduledJobnamePattern = Pattern 30 | .compile(Constants.PIG_SCHEDULED_JOBNAME_PATTERN_REGEX); 31 | private static Pattern pigLogfilePattern = Pattern 32 | .compile(Constants.PIG_LOGFILE_PATTERN_REGEX); 33 | 34 | // TODO: Make this configurable 35 | public static final String SCHEDULED_PREFIX = "oink "; 36 | 37 | /* 38 | * (non-Javadoc) 39 | * 40 | * @see 41 | * com.twitter.hraven.JobKeyFactoryBase#create(com.twitter.corestorage 42 | * .rhaven.QualifiedJobId, long, org.apache.hadoop.conf.Configuration) 43 | */ 44 | @Override 45 | public JobDesc create(QualifiedJobId qualifiedJobId, long submitTimeMillis, 46 | Configuration jobConf) { 47 | String appId = getAppId(jobConf); 48 | String version = jobConf.get(Constants.PIG_VERSION_CONF_KEY, 49 | Constants.UNKNOWN); 50 | long pigSubmitTimeMillis = jobConf.getLong(Constants.PIG_RUN_CONF_KEY, 0); 51 | 52 | // This means that Constants.PIG_RUN_CONF_KEY was not present (for jobs 53 | // launched with an older pig version). 54 | if (pigSubmitTimeMillis == 0) { 55 | String pigLogfile = jobConf.get(Constants.PIG_LOG_FILE_CONF_KEY); 56 | if (pigLogfile == null) { 57 | // Should be rare, but we're seeing this happen occasionally 58 | // Give up on grouping the jobs within the run together, and treat these as individual runs. 59 | pigSubmitTimeMillis = submitTimeMillis; 60 | } else { 61 | pigSubmitTimeMillis = getScriptStartTimeFromLogfileName(pigLogfile); 62 | } 63 | } 64 | 65 | return create(qualifiedJobId, jobConf, appId, version, Framework.PIG, 66 | pigSubmitTimeMillis); 67 | } 68 | 69 | /* 70 | * (non-Javadoc) 71 | * 72 | * @see 73 | * com.twitter.hraven.JobDescFactoryBase#getAppIdFromJobName(java.lang.String) 74 | */ 75 | @Override 76 | String getAppIdFromJobName(String jobName) { 77 | if (jobName == null) { 78 | return null; 79 | } 80 | 81 | Matcher matcher = scheduledJobnamePattern.matcher(jobName); 82 | 83 | // TODO: Externalize patterns to make them configurable 84 | if (matcher.matches()) { 85 | jobName = SCHEDULED_PREFIX + matcher.group(1); 86 | } 87 | 88 | return jobName; 89 | } 90 | 91 | /** 92 | * @param pigLogfile 93 | * as obtained from the JobConfig 94 | * @return 95 | */ 96 | public static long getScriptStartTimeFromLogfileName(String pigLogfile) { 97 | long pigSubmitTimeMillis = 0; 98 | 99 | if (pigLogfile == null) { 100 | return pigSubmitTimeMillis; 101 | } 102 | 103 | Matcher matcher = pigLogfilePattern.matcher(pigLogfile); 104 | if (matcher.matches()) { 105 | String submitTimeMillisString = matcher.group(1); 106 | pigSubmitTimeMillis = Long.parseLong(submitTimeMillisString); 107 | } 108 | return pigSubmitTimeMillis; 109 | } 110 | 111 | } 112 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/QualifiedJobId.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import org.codehaus.jackson.annotate.JsonCreator; 19 | import org.codehaus.jackson.annotate.JsonProperty; 20 | 21 | /** 22 | * The job ID should be relatively unique, unless two clusters start at the same 23 | * time. However, given a jobId it is not immediately clear which cluster a job 24 | * ran on (unless the cluster has not been restarted and the prefix is still the 25 | * current one). This class represents the fully qualified job identifier. 26 | * 27 | */ 28 | public class QualifiedJobId extends JobId { 29 | 30 | /** 31 | * The Hadoop cluster on which the job ran. 32 | */ 33 | private final String cluster; 34 | 35 | /** 36 | * Constructor. 37 | * 38 | * @param cluster 39 | * @param jobId 40 | */ 41 | @JsonCreator 42 | public QualifiedJobId(@JsonProperty("cluster") String cluster, 43 | @JsonProperty("jobId") String jobId) { 44 | super(jobId); 45 | this.cluster = (cluster != null ? cluster.trim() : ""); 46 | } 47 | 48 | public QualifiedJobId(String cluster, JobId jobId) { 49 | super(jobId); 50 | this.cluster = (cluster != null ? cluster.trim() : ""); 51 | } 52 | 53 | /** 54 | * @return The Hadoop cluster on which the job ran. 55 | */ 56 | public String getCluster() { 57 | return cluster; 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/Range.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | /** 19 | * A range of (sorted) items with a min and a max. 20 | */ 21 | public class Range { 22 | 23 | /** 24 | * The minimum item of class {@link E} in this range. 25 | */ 26 | private final E min; 27 | 28 | /** 29 | * The maximum item of class {@link E} in this range. 30 | */ 31 | private final E max; 32 | 33 | /** 34 | * Constructs a range 35 | * 36 | * @param min 37 | * the minimum of this range 38 | * @param max 39 | * the maximum of this range 40 | */ 41 | public Range(E min, E max) { 42 | this.min = min; 43 | this.max = max; 44 | } 45 | 46 | /** 47 | * @return the min of the range 48 | */ 49 | public E getMin() { 50 | return min; 51 | } 52 | 53 | /** 54 | * @return the max of the range. 55 | */ 56 | public E getMax() { 57 | return max; 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/TaskKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import org.apache.commons.lang.builder.CompareToBuilder; 19 | import org.apache.commons.lang.builder.HashCodeBuilder; 20 | import org.codehaus.jackson.annotate.JsonCreator; 21 | import org.codehaus.jackson.annotate.JsonProperty; 22 | import org.codehaus.jackson.map.annotate.JsonSerialize; 23 | 24 | 25 | /** 26 | * Represents the row key for an individual job task. This key shares all the 27 | * same components from the job key, with the additional of the task ID: 28 | *
29 |  *   (m|r)_tasknumber(_attemptnumber)?
30 |  * 
31 | */ 32 | @JsonSerialize( 33 | include=JsonSerialize.Inclusion.NON_NULL 34 | ) 35 | public class TaskKey extends JobKey implements Comparable { 36 | private String taskId; 37 | 38 | @JsonCreator 39 | public TaskKey(@JsonProperty("jobId") JobKey jobKey, @JsonProperty("taskId") String taskId) { 40 | super(jobKey.getQualifiedJobId(), jobKey.getUserName(), jobKey.getAppId(), 41 | jobKey.getRunId()); 42 | this.taskId = taskId; 43 | } 44 | 45 | public String getTaskId() { 46 | return this.taskId; 47 | } 48 | 49 | public String toString() { 50 | return super.toString() + Constants.SEP + getTaskId(); 51 | } 52 | 53 | /** 54 | * Compares two TaskKey objects on the basis of their taskId 55 | * 56 | * @param other 57 | * @return 0 if the taskIds are equal, 58 | * 1 if this taskId is greater than other taskId, 59 | * -1 if this taskId is less than other taskId 60 | */ 61 | @Override 62 | public int compareTo(Object other) { 63 | if (other == null) { 64 | return -1; 65 | } 66 | TaskKey otherKey = (TaskKey) other; 67 | return new CompareToBuilder().appendSuper(super.compareTo(otherKey)) 68 | .append(this.taskId, otherKey.getTaskId()) 69 | .toComparison(); 70 | } 71 | 72 | @Override 73 | public boolean equals(Object other) { 74 | if (other instanceof TaskKey) { 75 | return compareTo((TaskKey)other) == 0; 76 | } 77 | return false; 78 | } 79 | 80 | @Override 81 | public int hashCode(){ 82 | return new HashCodeBuilder().appendSuper(super.hashCode()) 83 | .append(this.taskId) 84 | .toHashCode(); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/AppAggregationKeyConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2014 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package com.twitter.hraven.datasource; 18 | 19 | import org.apache.hadoop.hbase.util.Bytes; 20 | 21 | import com.twitter.hraven.AppAggregationKey; 22 | import com.twitter.hraven.Constants; 23 | import com.twitter.hraven.util.ByteUtil; 24 | 25 | /** 26 | * To convert the row key into {@link AppAggregationKey} components 27 | * and vice versa 28 | * 29 | */ 30 | public class AppAggregationKeyConverter implements ByteConverter { 31 | 32 | @Override 33 | public byte[] toBytes(AppAggregationKey appAggKey) { 34 | if (appAggKey == null) { 35 | return Constants.EMPTY_BYTES; 36 | } else { 37 | return ByteUtil.join(Constants.SEP_BYTES, 38 | Bytes.toBytes(appAggKey.getCluster()), 39 | Bytes.toBytes(appAggKey.getEncodedAggregationId()), 40 | Bytes.toBytes(appAggKey.getUserName()), 41 | Bytes.toBytes(appAggKey.getAppId())); 42 | } 43 | } 44 | 45 | @Override 46 | public AppAggregationKey fromBytes(byte[] bytes) { 47 | return fromBytes(ByteUtil.split(bytes, Constants.SEP_BYTES, 4)); 48 | } 49 | 50 | public AppAggregationKey fromBytes(byte[][] splitBytes) { 51 | long runId = splitBytes.length > 1 ? Long.MAX_VALUE - Bytes.toLong(splitBytes[1]) : 0; 52 | return new AppAggregationKey( Bytes.toString(splitBytes[0]), 53 | splitBytes.length > 2 ? Bytes.toString(splitBytes[2]) : null, 54 | splitBytes.length > 3 ? Bytes.toString(splitBytes[3]) : null, 55 | runId); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/ByteConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | /** 19 | */ 20 | public interface ByteConverter { 21 | public byte[] toBytes(T object); 22 | public T fromBytes(byte[] bytes); 23 | } 24 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/DataException.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | /** 19 | * Base exception representing errors in data retrieval or storage. 20 | */ 21 | public class DataException extends Exception { 22 | 23 | private static final long serialVersionUID = 2406302267896675759L; 24 | 25 | public DataException(String message) { 26 | super(message); 27 | } 28 | 29 | public DataException(String message, Throwable cause) { 30 | super(message, cause); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/FlowEventKeyConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | import com.twitter.hraven.Constants; 19 | import com.twitter.hraven.FlowEventKey; 20 | import com.twitter.hraven.util.ByteUtil; 21 | 22 | import org.apache.hadoop.hbase.util.Bytes; 23 | 24 | /** 25 | */ 26 | public class FlowEventKeyConverter implements ByteConverter { 27 | private FlowKeyConverter flowKeyConverter = new FlowKeyConverter(); 28 | 29 | @Override 30 | public byte[] toBytes(FlowEventKey key) { 31 | if (key == null) { 32 | return Constants.EMPTY_BYTES; 33 | } 34 | return ByteUtil.join(Constants.SEP_BYTES, flowKeyConverter.toBytes(key), 35 | Bytes.toBytes(key.getSequence())); 36 | } 37 | 38 | @Override 39 | public FlowEventKey fromBytes(byte[] bytes) { 40 | byte[][] splits = ByteUtil.split(bytes, Constants.SEP_BYTES, 4); 41 | byte[][] flowKeySplits = new byte[4][]; 42 | for (int i=0; i { 27 | 28 | @Override 29 | public byte[] toBytes(FlowKey flowKey) { 30 | if (flowKey == null) { 31 | return Constants.EMPTY_BYTES; 32 | } else { 33 | return ByteUtil.join(Constants.SEP_BYTES, 34 | Bytes.toBytes(flowKey.getCluster()), 35 | Bytes.toBytes(flowKey.getUserName()), 36 | Bytes.toBytes(flowKey.getAppId()), 37 | Bytes.toBytes(flowKey.getEncodedRunId())); 38 | } 39 | } 40 | 41 | @Override 42 | public FlowKey fromBytes(byte[] bytes) { 43 | return fromBytes(ByteUtil.split(bytes, Constants.SEP_BYTES, 4)); 44 | } 45 | 46 | public FlowKey fromBytes(byte[][] splitBytes) { 47 | long runId = splitBytes.length > 3 ? Long.MAX_VALUE - Bytes.toLong(splitBytes[3]) : 0; 48 | return new FlowKey( Bytes.toString(splitBytes[0]), 49 | splitBytes.length > 1 ? Bytes.toString(splitBytes[1]) : null, 50 | splitBytes.length > 2 ? Bytes.toString(splitBytes[2]) : null, 51 | runId); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/FlowQueueKeyConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | import com.twitter.hraven.Constants; 19 | import com.twitter.hraven.Flow; 20 | import com.twitter.hraven.FlowQueueKey; 21 | import com.twitter.hraven.util.ByteUtil; 22 | 23 | import org.apache.hadoop.hbase.util.Bytes; 24 | 25 | /** 26 | * Handles serialization and deserialization of a {@link FlowQueueKey} to and from bytes. 27 | */ 28 | public class FlowQueueKeyConverter implements ByteConverter { 29 | @Override 30 | public byte[] toBytes(FlowQueueKey key) { 31 | if (key == null) { 32 | return Constants.EMPTY_BYTES; 33 | } 34 | long invertedTimestamp = Long.MAX_VALUE - key.getTimestamp(); 35 | return ByteUtil.join(Constants.SEP_BYTES, 36 | Bytes.toBytes(key.getCluster()), 37 | (key.getStatus() == null ? Constants.EMPTY_BYTES : key.getStatus().code()), 38 | Bytes.toBytes(invertedTimestamp), 39 | Bytes.toBytes(key.getFlowId())); 40 | } 41 | 42 | @Override 43 | public FlowQueueKey fromBytes(byte[] bytes) { 44 | if (bytes == null) { 45 | return null; 46 | } 47 | 48 | byte[][] firstSplit = ByteUtil.split(bytes, Constants.SEP_BYTES, 3); 49 | byte[] timestampBytes = null; 50 | byte[] flowIdBytes = null; 51 | if (firstSplit.length == 3) { 52 | int offset = 0; 53 | timestampBytes = ByteUtil.safeCopy(firstSplit[2], 0, 8); 54 | offset += 8+Constants.SEP_BYTES.length; 55 | flowIdBytes = ByteUtil.safeCopy(firstSplit[2], offset, firstSplit[2].length - offset); 56 | } 57 | 58 | return new FlowQueueKey(Bytes.toString(firstSplit[0]), 59 | firstSplit.length > 1 ? Flow.STATUS_BY_CODE.get(firstSplit[1]) : null, 60 | timestampBytes != null ? Long.MAX_VALUE - Bytes.toLong(timestampBytes) : 0, 61 | flowIdBytes != null ? Bytes.toString(flowIdBytes) : null); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/JobIdConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | import org.apache.hadoop.hbase.util.Bytes; 19 | 20 | import com.twitter.hraven.JobId; 21 | 22 | /** 23 | */ 24 | public class JobIdConverter implements ByteConverter { 25 | @Override 26 | public byte[] toBytes(JobId jobId) { 27 | return Bytes.add(Bytes.toBytes(jobId.getJobEpoch()), 28 | Bytes.toBytes(jobId.getJobSequence())); 29 | } 30 | 31 | @Override 32 | public JobId fromBytes(byte[] bytes) { 33 | if (bytes == null || bytes.length < 16) { 34 | return null; 35 | } 36 | 37 | // expect a packed bytes encoding of [8 bytes epoch][8 bytes seq] 38 | long epoch = Bytes.toLong(bytes, 0); 39 | long seq = Bytes.toLong(bytes, 8); 40 | return new JobId(epoch, seq); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/MissingColumnInResultException.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | import org.apache.hadoop.hbase.client.Result; 19 | import org.apache.hadoop.hbase.client.Scan; 20 | import org.apache.hadoop.hbase.util.Bytes; 21 | 22 | /** 23 | * Indicates that the {@link Result} from a {@link Scan} is missing an expected 24 | * column. 25 | *

26 | * Specifically, this exception indicates that the {@link Cell} returned by 27 | * {@link Result#getColumnLatest(byte[], byte[])} is null or the 28 | * list returned by {@link Result#getColumn(byte[], byte[]) is empty. 29 | */ 30 | public class MissingColumnInResultException extends Exception { 31 | 32 | private static final long serialVersionUID = 2561802650466866719L; 33 | 34 | 35 | private final byte [] family; 36 | private final byte [] qualifier; 37 | 38 | /** 39 | * Constructs an exception indicating that the specified column 40 | * @param family 41 | * @param qualifier 42 | */ 43 | public MissingColumnInResultException(byte [] family, byte [] qualifier) { 44 | super("Missing column: " + Bytes.toString(qualifier) + " from column family: " 45 | + Bytes.toString(family)); 46 | this.family = family; 47 | this.qualifier = qualifier; 48 | } 49 | 50 | /** 51 | * @return the family for which a column was missing. 52 | */ 53 | public byte[] getFamily() { 54 | return family; 55 | } 56 | 57 | /** 58 | * @return the qualifier indicating which column was missing. 59 | */ 60 | public byte[] getQualifier() { 61 | return qualifier; 62 | } 63 | 64 | 65 | 66 | } 67 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/ProcessingException.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | /** 19 | */ 20 | public class ProcessingException extends RuntimeException { 21 | 22 | private static final long serialVersionUID = -5606549071630261979L; 23 | 24 | public ProcessingException(String message) { 25 | super(message); 26 | } 27 | 28 | public ProcessingException(String message, Throwable cause) { 29 | super(message, cause); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/QualifiedJobIdConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | import org.apache.hadoop.hbase.util.Bytes; 19 | 20 | import com.twitter.hraven.Constants; 21 | import com.twitter.hraven.JobId; 22 | import com.twitter.hraven.QualifiedJobId; 23 | import com.twitter.hraven.util.ByteUtil; 24 | 25 | /** 26 | */ 27 | public class QualifiedJobIdConverter implements ByteConverter { 28 | JobIdConverter jobIdConv = new JobIdConverter(); 29 | 30 | @Override 31 | public byte[] toBytes(QualifiedJobId id) { 32 | return ByteUtil.join(Constants.SEP_BYTES, 33 | Bytes.toBytes(id.getCluster()), 34 | jobIdConv.toBytes(id)); 35 | } 36 | 37 | @Override 38 | public QualifiedJobId fromBytes(byte[] bytes) { 39 | byte[][] parts = ByteUtil.split(bytes, Constants.SEP_BYTES, 2); 40 | if (parts.length != 2) { 41 | throw new IllegalArgumentException("Invalid encoded ID, must be 2 parts"); 42 | } 43 | String cluster = Bytes.toString(parts[0]); 44 | JobId jobId = jobIdConv.fromBytes(parts[1]); 45 | return new QualifiedJobId(cluster, jobId); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/RowKeyParseException.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | /** 19 | * This exception indicates that a row key could not be parsed successfully. 20 | */ 21 | public class RowKeyParseException extends Exception { 22 | 23 | private static final long serialVersionUID = 839389516279735249L; 24 | 25 | /** 26 | * @param message 27 | */ 28 | public RowKeyParseException(String message) { 29 | super(message); 30 | } 31 | 32 | /** 33 | * @param message 34 | * @param cause 35 | */ 36 | public RowKeyParseException(String message, Throwable cause) { 37 | super(message, cause); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/RunMatchFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | import java.io.DataInput; 19 | import java.io.DataOutput; 20 | import java.io.IOException; 21 | import java.util.List; 22 | 23 | import org.apache.hadoop.hbase.filter.Filter; 24 | import org.apache.hadoop.hbase.filter.FilterBase; 25 | import org.apache.hadoop.hbase.util.Bytes; 26 | import org.apache.hadoop.hbase.Cell; 27 | 28 | import com.twitter.hraven.Constants; 29 | import com.twitter.hraven.util.ByteUtil; 30 | 31 | /** 32 | * Match up to N runs of a given app. Once N runs have been seen, we filter all 33 | * remaining rows. 34 | */ 35 | public class RunMatchFilter extends FilterBase { 36 | private byte[] appId; 37 | private int maxCount; 38 | private byte[] lastRunId = null; 39 | private int seenCount; 40 | 41 | /** 42 | * Match only a single run of the given appId 43 | * @param appId 44 | */ 45 | public RunMatchFilter(String appId) { 46 | this(appId, 1); 47 | } 48 | 49 | /** 50 | * Match up to maxCount runs of the given appId 51 | * 52 | * @param appId 53 | * @param maxCount 54 | */ 55 | public RunMatchFilter(String appId, int maxCount) { 56 | this.appId = Bytes.toBytes(appId); 57 | this.maxCount = maxCount; 58 | } 59 | 60 | @Override 61 | public void reset() { 62 | this.seenCount = 0; 63 | } 64 | 65 | @Override 66 | public boolean filterRowKey(byte[] buffer, int offset, int length) { 67 | // TODO: don't copy the byte[] 68 | byte[] rowkey = new byte[length]; 69 | System.arraycopy(buffer, offset, rowkey, 0, length); 70 | List splits = ByteUtil.splitRanges(rowkey, Constants.SEP_BYTES); 71 | if (splits.size() < 4) { 72 | // invalid row key 73 | return true; 74 | } 75 | ByteUtil.Range appRange = splits.get(1); 76 | int appCompare = Bytes.compareTo(appId, 0, appId.length, 77 | rowkey, appRange.start(), appRange.length()); 78 | if (appCompare != 0) { 79 | return false; 80 | } 81 | ByteUtil.Range runRange = splits.get(2); 82 | int runLength = runRange.length(); 83 | if (lastRunId == null || 84 | Bytes.compareTo(lastRunId, 0, lastRunId.length, 85 | rowkey, runRange.start(), runLength) != 0) { 86 | lastRunId = new byte[runLength]; 87 | System.arraycopy(rowkey, runRange.start(), lastRunId, 0, runLength); 88 | seenCount++; 89 | } 90 | 91 | return seenCount > maxCount; 92 | } 93 | 94 | @Override 95 | public boolean filterAllRemaining() { 96 | // once we've seen the limit number of runs, skip everything else 97 | return seenCount > maxCount; 98 | } 99 | 100 | // TODO dogpile days check override @Override 101 | public void write(DataOutput out) throws IOException { 102 | out.writeInt(appId.length); 103 | out.write(appId); 104 | out.writeInt(maxCount); 105 | } 106 | 107 | //TODO dogpile days check override @Override 108 | public void readFields(DataInput in) throws IOException { 109 | int appIdLength = in.readInt(); 110 | this.appId = new byte[appIdLength]; 111 | in.readFully(appId); 112 | this.maxCount = in.readInt(); 113 | } 114 | 115 | @Override 116 | public Filter.ReturnCode filterKeyValue(Cell cell) 117 | throws IOException { 118 | //TODO dogpiledays 119 | return Filter.ReturnCode.SKIP; 120 | } 121 | 122 | } 123 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/TaskKeyConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | import org.apache.hadoop.hbase.util.Bytes; 19 | 20 | import com.twitter.hraven.Constants; 21 | import com.twitter.hraven.JobKey; 22 | import com.twitter.hraven.TaskKey; 23 | 24 | /** 25 | */ 26 | public class TaskKeyConverter implements ByteConverter { 27 | private JobKeyConverter jobKeyConv = new JobKeyConverter(); 28 | 29 | /** 30 | * Returns the bytes representation for a TaskKey. 31 | * 32 | * @param taskKey 33 | * the TaskKey instance to serialize 34 | * @return the serialized representation of the TaskKey 35 | */ 36 | @Override 37 | public byte[] toBytes(TaskKey taskKey) { 38 | return Bytes.add(jobKeyConv.toBytes(taskKey), Constants.SEP_BYTES, 39 | Bytes.toBytes(taskKey.getTaskId())); 40 | } 41 | 42 | /** 43 | * Generates a TaskKey from the byte encoded format. 44 | * 45 | * @param bytes the serialized version of a task key 46 | * @return the deserialized TaskKey instance 47 | */ 48 | @Override 49 | public TaskKey fromBytes(byte[] bytes) { 50 | byte[][] keyComponents = JobKeyConverter.splitJobKey(bytes); 51 | JobKey jobKey = jobKeyConv.parseJobKey(keyComponents); 52 | return new TaskKey(jobKey, 53 | (keyComponents.length > 5 ? Bytes.toString(keyComponents[5]) : null)); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/datasource/VersionInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | import org.apache.commons.lang.builder.HashCodeBuilder; 19 | 20 | public class VersionInfo implements Comparable { 21 | 22 | private String version; 23 | private long timestamp; 24 | 25 | public VersionInfo(String v, long ts) { 26 | this.version = v; 27 | this.timestamp = ts; 28 | } 29 | 30 | public String getVersion() { 31 | return version; 32 | } 33 | 34 | public void setVersion(String version) { 35 | this.version = version; 36 | } 37 | 38 | public long getTimestamp() { 39 | return timestamp; 40 | } 41 | 42 | public void setTimestamp(long timestamp) { 43 | this.timestamp = timestamp; 44 | } 45 | 46 | /** 47 | * Compares two VersionInfo timestamps to order them in reverse chronological 48 | * order 49 | * 50 | * @param other 51 | * @return 0 if timestamps are equal, 1 if this timestamp less than other 52 | * timestamp, -1 if this timestamp is greater than other timestamp 53 | * 54 | */ 55 | @Override 56 | public int compareTo(VersionInfo other) { 57 | if (this.timestamp == other.timestamp) { 58 | return 0; 59 | } 60 | if (this.timestamp < other.timestamp) { 61 | return 1; 62 | } 63 | return -1; 64 | } 65 | 66 | @Override 67 | public boolean equals(Object other) { 68 | if (other instanceof VersionInfo) { 69 | VersionInfo otherVersionInfo = (VersionInfo) other; 70 | return (this.timestamp == otherVersionInfo.timestamp) 71 | && (this.version.equals(otherVersionInfo.version)); 72 | } 73 | return false; 74 | } 75 | 76 | @Override 77 | public int hashCode(){ 78 | return new HashCodeBuilder() 79 | .append(this.timestamp) 80 | .append(this.version) 81 | .toHashCode(); 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/rest/HravenRestServer.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.rest; 17 | 18 | import java.util.Arrays; 19 | import java.util.Map; 20 | 21 | import javax.inject.Inject; 22 | 23 | import org.apache.commons.logging.Log; 24 | import org.apache.commons.logging.LogFactory; 25 | 26 | import com.google.common.collect.Maps; 27 | import com.google.inject.Module; 28 | import com.twitter.common.application.AbstractApplication; 29 | import com.twitter.common.application.modules.HttpModule; 30 | import com.twitter.common.application.modules.StatsModule; 31 | import com.twitter.common.application.AppLauncher; 32 | import com.twitter.common.application.Lifecycle; 33 | import com.twitter.common.application.modules.LogModule; 34 | 35 | import com.twitter.common.net.http.HttpServerDispatch; 36 | import com.twitter.common.stats.Stats; 37 | 38 | /** 39 | * This is the application that launches the REST API 40 | * It also exposes the metrics collected via the Stats System 41 | * at http://hostname:portnum/vars or 42 | * http://hostname:portnum/vars.json 43 | * These metrics can be collected from a plugin to be fed into 44 | * to any metric collection system 45 | */ 46 | public class HravenRestServer extends AbstractApplication { 47 | private static final Log LOG = LogFactory.getLog(HravenRestServer.class); 48 | 49 | @Inject private Lifecycle lifecycle; 50 | @Inject private HttpServerDispatch httpServer; 51 | 52 | @Override 53 | public void run() { 54 | LOG.info("Running"); 55 | Map initParams = Maps.newHashMap(); 56 | initParams.put("com.sun.jersey.config.property.packages", "com.twitter.hraven.rest"); 57 | initParams.put("com.sun.jersey.api.json.POJOMappingFeature", "true"); 58 | 59 | httpServer.registerHandler("/", 60 | new com.sun.jersey.spi.container.servlet.ServletContainer(), initParams, false); 61 | 62 | // export a metric that printouts the epoch time this service came up 63 | // metrics can be viewed at hostname:portnumber/vars or 64 | // hostname:portnumber/vars.json 65 | Stats.exportLong("hravenRestService_StartTimestamp", System.currentTimeMillis()); 66 | 67 | // await shutdown 68 | lifecycle.awaitShutdown(); 69 | } 70 | 71 | /** 72 | * This tells AppLauncher what modules to load 73 | */ 74 | @Override 75 | public Iterable getModules() { 76 | return Arrays.asList( 77 | new HttpModule(), 78 | new LogModule(), 79 | new StatsModule() 80 | ); 81 | } 82 | 83 | public static void main(String[] args) { 84 | AppLauncher.launch(HravenRestServer.class, args); 85 | } 86 | } 87 | 88 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/rest/PaginatedResult.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.rest; 17 | 18 | import java.util.ArrayList; 19 | import java.util.HashMap; 20 | import java.util.List; 21 | import java.util.Map; 22 | 23 | /** 24 | * Container class that maintains a set of results that can be used for 25 | * retrieving results in a paginated fashion 26 | */ 27 | 28 | public class PaginatedResult { 29 | 30 | // the start row for the next page of results 31 | // if no more results are remaining, this will be null 32 | private byte[] nextStartRow; 33 | 34 | // the number of results to be returned per call 35 | private int limit; 36 | 37 | // request parameters & values 38 | private Map requestParameters; 39 | 40 | // actual values that are to be returned 41 | private List values; 42 | 43 | // basic constructor 44 | public PaginatedResult() { 45 | values = new ArrayList(); 46 | requestParameters = new HashMap(); 47 | // set the next start row to null 48 | // this helps the UI to know that there is no next page 49 | this.setNextStartRow(null); 50 | limit = 0; 51 | } 52 | 53 | // constructor with limit 54 | public PaginatedResult(int limit) { 55 | values = new ArrayList(); 56 | requestParameters = new HashMap(); 57 | this.limit = limit; 58 | // set the next start row to null 59 | // this helps the UI to know that there is no next page 60 | this.setNextStartRow(null); 61 | } 62 | 63 | public List getValues() { 64 | return values; 65 | } 66 | 67 | public void setValues(List inputValues) { 68 | this.values = inputValues; 69 | } 70 | 71 | public void addValue(T value) { 72 | this.values.add(value); 73 | } 74 | 75 | public byte[] getNextStartRow() { 76 | return nextStartRow; 77 | } 78 | 79 | public void setNextStartRow(byte[] nextStartRow) { 80 | this.nextStartRow = nextStartRow; 81 | } 82 | 83 | public int getLimit() { 84 | return limit; 85 | } 86 | 87 | public void setLimit(int limit) { 88 | this.limit = limit; 89 | } 90 | 91 | public Map getRequestParameters() { 92 | return requestParameters; 93 | } 94 | 95 | public void setRequestParameters(Map requestParameters) { 96 | this.requestParameters = requestParameters; 97 | } 98 | 99 | public void addRequestParameter(String param, String value) { 100 | this.requestParameters.put(param, value); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/rest/RestJSONResource.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2016 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.rest; 17 | 18 | /** 19 | * Main REST resource that handles binding the REST API to the JobHistoryService. 20 | * 21 | * TODO: better prevalidation 22 | * TODO: handle null results with empty json object or response code 23 | */ 24 | public class RestJSONResource { 25 | 26 | // This is rather bad, because references might trigger classloading on the clients side. 27 | // Should have been separate class, but leaving in place for API compatibility 28 | public static final String SLASH = "/" ; 29 | 30 | } 31 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/rest/client/UrlDataLoader.java: -------------------------------------------------------------------------------- 1 | package com.twitter.hraven.rest.client; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.util.List; 6 | 7 | import org.apache.commons.io.IOUtils; 8 | import org.apache.commons.logging.Log; 9 | import org.apache.commons.logging.LogFactory; 10 | import org.apache.http.HttpResponse; 11 | import org.apache.http.client.config.RequestConfig; 12 | import org.apache.http.client.methods.HttpGet; 13 | import org.apache.http.impl.client.CloseableHttpClient; 14 | import org.apache.http.impl.client.HttpClientBuilder; 15 | import org.codehaus.jackson.type.TypeReference; 16 | 17 | import com.twitter.hraven.util.JSONUtil; 18 | 19 | class UrlDataLoader { 20 | 21 | private static final Log LOG = LogFactory.getLog(UrlDataLoader.class); 22 | 23 | private String endpointURL; 24 | private TypeReference typeRef; 25 | private int connectTimeout; 26 | private int readTimeout; 27 | private boolean useCompression; 28 | 29 | /** 30 | * Constructor, defaults to using compression (gzip / deflate). 31 | * @param endpointUrl 32 | * @param t TypeReference for json deserialization, should be TypeReference>. 33 | * @throws java.io.IOException 34 | */ 35 | public UrlDataLoader(String endpointUrl, TypeReference t, int connectTimeout, int readTimeout) 36 | throws IOException { 37 | this(endpointUrl, t, connectTimeout, readTimeout, true); 38 | } 39 | 40 | /** 41 | * Constructor. 42 | * @param endpointUrl 43 | * @param t TypeReference for json deserialization, should be TypeReference>. 44 | * @throws java.io.IOException 45 | */ 46 | public UrlDataLoader(String endpointUrl, TypeReference t, int connectTimeout, int readTimeout, 47 | boolean useCompression) throws IOException { 48 | this.endpointURL = endpointUrl; 49 | this.typeRef = t; 50 | this.connectTimeout = connectTimeout; 51 | this.readTimeout = readTimeout; 52 | this.useCompression = useCompression; 53 | } 54 | 55 | @SuppressWarnings("unchecked") 56 | public List load() throws IOException { 57 | InputStream input = null; 58 | 59 | RequestConfig requestConfig = 60 | RequestConfig.custom() 61 | .setConnectTimeout(connectTimeout) 62 | .setConnectionRequestTimeout(connectTimeout) 63 | .setSocketTimeout(readTimeout).build(); 64 | HttpClientBuilder httpClientBuilder = 65 | HttpClientBuilder.create().setDefaultRequestConfig(requestConfig); 66 | 67 | if (! useCompression) { 68 | LOG.info("Not using compression!"); 69 | httpClientBuilder.disableContentCompression(); 70 | } else { 71 | LOG.debug("Using compression by default! Trying gzip, deflate"); 72 | } 73 | 74 | CloseableHttpClient httpClient = httpClientBuilder.build(); 75 | HttpGet httpGet = new HttpGet(endpointURL); 76 | HttpResponse response = httpClient.execute(httpGet); 77 | 78 | try { 79 | input = response.getEntity().getContent(); 80 | return (List) JSONUtil.readJson(input, typeRef); 81 | } finally { 82 | IOUtils.closeQuietly(input); 83 | IOUtils.closeQuietly(httpClient); 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/util/ByteArrayWrapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.util; 17 | 18 | import java.io.ByteArrayInputStream; 19 | import java.io.Closeable; 20 | import java.io.EOFException; 21 | import java.io.IOException; 22 | 23 | import org.apache.hadoop.fs.PositionedReadable; 24 | import org.apache.hadoop.fs.Seekable; 25 | 26 | /** 27 | * An input stream class backed by a byte array that also implements 28 | * PositionedReadable, Seekable, and Closeable. It can be 29 | * used as an input to the FSDataInputStream. 30 | * 31 | * @see org.apache.hadoop.fs.FSDataInputStream 32 | */ 33 | public class ByteArrayWrapper extends ByteArrayInputStream 34 | implements PositionedReadable, Seekable, Closeable { 35 | /** 36 | * Constructor that creates an instance of ByteArrayWrapper. 37 | */ 38 | public ByteArrayWrapper(byte[] buf) { 39 | super(buf); 40 | } 41 | 42 | /** 43 | * Seeks and sets position to the specified value. 44 | * 45 | * @throws IOException if position is negative or exceeds the buffer size 46 | * 47 | * {@inheritDoc} 48 | */ 49 | public synchronized void seek(long position) throws IOException { 50 | if (position < 0 || position >= count) { 51 | throw new IOException("cannot seek position " + position + " as it is out of bounds"); 52 | } 53 | pos = (int) position; 54 | } 55 | 56 | /** 57 | * {@inheritDoc} 58 | */ 59 | public synchronized long getPos() throws IOException { 60 | return pos; 61 | } 62 | 63 | /** 64 | * This is not applicable to ByteArrayWrapper, and always returns false. 65 | * 66 | * {@inheritDoc} 67 | */ 68 | public boolean seekToNewSource(long targetPos) throws IOException { 69 | return false; 70 | } 71 | 72 | /** 73 | * {@inheritDoc} 74 | */ 75 | public synchronized int read(long position, byte[] buffer, int offset, int length) 76 | throws IOException { 77 | long oldPos = getPos(); 78 | int nread = -1; 79 | try { 80 | seek(position); 81 | nread = read(buffer, offset, length); 82 | } finally { 83 | seek(oldPos); 84 | } 85 | return nread; 86 | } 87 | 88 | /** 89 | * {@inheritDoc} 90 | */ 91 | public synchronized void readFully(long position, byte[] buffer, int offset, int length) 92 | throws IOException { 93 | int nread = 0; 94 | while (nread < length) { 95 | int nbytes = read(position + nread, buffer, offset + nread, length - nread); 96 | if (nbytes < 0) { 97 | throw new EOFException("End of file reached before reading fully."); 98 | } 99 | nread += nbytes; 100 | } 101 | } 102 | 103 | /** 104 | * {@inheritDoc} 105 | */ 106 | public synchronized void readFully(long position, byte[] buffer) throws IOException { 107 | readFully(position, buffer, 0, buffer.length); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/util/DateUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.util; 17 | 18 | import java.util.Calendar; 19 | import java.util.GregorianCalendar; 20 | 21 | /** 22 | */ 23 | public class DateUtil { 24 | public static final long MONTH_IN_MILLIS = 30L*24*60*60*1000; 25 | 26 | /** 27 | * @return the timestamp (in milliseconds) of baseTimestamp truncate to month start 28 | */ 29 | public static long getMonthStart(long baseTimestamp) { 30 | Calendar cal = new GregorianCalendar(); 31 | cal.setTimeInMillis(baseTimestamp); 32 | // truncate to start of month 33 | cal.set(Calendar.DAY_OF_MONTH, 1); 34 | cal.set(Calendar.HOUR_OF_DAY, 0); 35 | cal.set(Calendar.MINUTE, 0); 36 | cal.set(Calendar.SECOND, 0); 37 | cal.set(Calendar.MILLISECOND, 0); 38 | return cal.getTimeInMillis(); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/util/HadoopConfUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package com.twitter.hraven.util; 18 | 19 | import org.apache.commons.lang.StringUtils; 20 | import org.apache.commons.logging.Log; 21 | import org.apache.commons.logging.LogFactory; 22 | import org.apache.hadoop.conf.Configuration; 23 | import com.twitter.hraven.Constants; 24 | 25 | /** 26 | * Utility class for accessing parameters from the Hadoop Conf 27 | * used in case of parameter name changes across hadoop versions 28 | */ 29 | public class HadoopConfUtil { 30 | 31 | private static Log LOG = LogFactory.getLog(HadoopConfUtil.class); 32 | 33 | /** 34 | * Get the user name from the job conf check for hadoop2 config param, then 35 | * hadoop1 36 | * 37 | * @param jobConf 38 | * @return userName 39 | * 40 | * @throws IllegalArgumentException 41 | */ 42 | public static String getUserNameInConf(Configuration jobConf) 43 | throws IllegalArgumentException { 44 | String userName = jobConf.get(Constants.USER_CONF_KEY_HADOOP2); 45 | if (StringUtils.isBlank(userName)) { 46 | userName = jobConf.get(Constants.USER_CONF_KEY); 47 | if (StringUtils.isBlank(userName)) { 48 | // neither user.name nor hadoop.mapreduce.job.user.name found 49 | throw new IllegalArgumentException(" Found neither " 50 | + Constants.USER_CONF_KEY + " nor " 51 | + Constants.USER_CONF_KEY_HADOOP2); 52 | } 53 | } 54 | return userName; 55 | } 56 | 57 | /** 58 | * checks if the jobConf contains a certain parameter 59 | * 60 | * @param jobConf 61 | * @param name 62 | * @return true if the job conf contains that parameter 63 | * false if the job conf does not contain that parameter 64 | */ 65 | public static boolean contains(Configuration jobConf, String name) { 66 | if (StringUtils.isNotBlank(jobConf.get(name))) { 67 | return true; 68 | } else { 69 | return false; 70 | } 71 | } 72 | 73 | /** 74 | * retrieves the queue name from a hadoop conf 75 | * looks for hadoop2 and hadoop1 settings 76 | * 77 | * @param jobConf 78 | * @return queuename 79 | */ 80 | public static String getQueueName(Configuration jobConf) { 81 | // look for the hadoop2 queuename first 82 | String hRavenQueueName = jobConf.get(Constants.QUEUENAME_HADOOP2); 83 | if (StringUtils.isBlank(hRavenQueueName)) { 84 | // presumably a hadoop1 conf, check for fair scheduler pool name 85 | hRavenQueueName = jobConf 86 | .get(Constants.FAIR_SCHEDULER_POOLNAME_HADOOP1); 87 | if (StringUtils.isBlank(hRavenQueueName)) { 88 | // check for capacity scheduler queue name 89 | hRavenQueueName = jobConf 90 | .get(Constants.CAPACITY_SCHEDULER_QUEUENAME_HADOOP1); 91 | if (StringUtils.isBlank(hRavenQueueName)) { 92 | // neither pool (hadoop1) nor queuename (hadoop2) found 93 | // presumably FIFO scheduler, hence set to "DEFAULT_QUEUE" 94 | hRavenQueueName = Constants.DEFAULT_QUEUENAME; 95 | LOG.info(" Found neither " 96 | + Constants.FAIR_SCHEDULER_POOLNAME_HADOOP1 97 | + " nor " + Constants.QUEUENAME_HADOOP2 + " nor " 98 | + Constants.CAPACITY_SCHEDULER_QUEUENAME_HADOOP1 99 | + " hence presuming FIFO scheduler " 100 | + " and setting the queuename to " 101 | + Constants.DEFAULT_QUEUENAME); 102 | } 103 | } 104 | } 105 | return hRavenQueueName; 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/util/JSONUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.util; 17 | 18 | import java.io.IOException; 19 | import java.io.InputStream; 20 | import java.io.PrintWriter; 21 | import java.io.Writer; 22 | 23 | import org.codehaus.jackson.map.DeserializationConfig; 24 | import org.codehaus.jackson.map.ObjectMapper; 25 | import org.codehaus.jackson.map.SerializationConfig; 26 | import org.codehaus.jackson.type.TypeReference; 27 | 28 | import com.twitter.hraven.ClientObjectMapper; 29 | import com.twitter.hraven.rest.ObjectMapperProvider; 30 | 31 | /** 32 | * Helper class used in the rest client. 33 | */ 34 | // TODO: Remove this class. 35 | @Deprecated 36 | public class JSONUtil { 37 | 38 | /** 39 | * Writes object to the writer as JSON using Jackson and adds a new-line before flushing. 40 | * @param writer the writer to write the JSON to 41 | * @param object the object to write as JSON 42 | * @throws IOException if the object can't be serialized as JSON or written to the writer 43 | */ 44 | public static void writeJson(Writer writer, Object object) throws IOException { 45 | ObjectMapper om = ObjectMapperProvider.createCustomMapper(); 46 | 47 | om.configure(SerializationConfig.Feature.INDENT_OUTPUT, true); 48 | om.configure(SerializationConfig.Feature.FAIL_ON_EMPTY_BEANS, false); 49 | 50 | writer.write(om.writeValueAsString(object)); 51 | writer.write("\n"); 52 | writer.flush(); 53 | } 54 | 55 | public static void writeJson(String fileName, Object object) throws IOException { 56 | JSONUtil.writeJson(new PrintWriter(fileName), object); 57 | } 58 | 59 | public static Object readJson(InputStream inputStream, TypeReference type) throws IOException { 60 | ObjectMapper om = ClientObjectMapper.createCustomMapper(); 61 | om.getDeserializationConfig().set(DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, 62 | false); 63 | return om.readValue(inputStream, type); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /hraven-core/src/main/java/com/twitter/hraven/util/StringUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.util; 17 | 18 | import java.io.IOException; 19 | import java.net.URLEncoder; 20 | import java.util.List; 21 | 22 | import com.twitter.hraven.Constants; 23 | 24 | /** 25 | * Utility class for string manipulation. 26 | */ 27 | public class StringUtil { 28 | 29 | private static final String SPACE = " "; 30 | private static final String UNDERSCORE = "_"; 31 | 32 | /** 33 | * Takes a string token to be used as a key or qualifier and cleanses out reserved tokens. This 34 | * operation is not symetrical. Logic is to replace all spaces and exclamation points with 35 | * underscores. 36 | * 37 | * @param token token to cleanse. 38 | * @return 39 | */ 40 | public static String cleanseToken(String token) { 41 | if (token == null || token.length() == 0) { return token; }; 42 | 43 | String cleansed = token.replaceAll(SPACE, UNDERSCORE); 44 | cleansed = cleansed.replaceAll(Constants.SEP, UNDERSCORE); 45 | 46 | return cleansed; 47 | } 48 | 49 | /** 50 | * builds up a String with the parameters for the filtering of fields 51 | * @param paramName 52 | * @param paramArgs 53 | * @return String 54 | * @throws IOException 55 | */ 56 | public static String buildParam(String paramName, List paramArgs) 57 | throws IOException { 58 | StringBuilder sb = new StringBuilder(); 59 | for (String arg : paramArgs) { 60 | if (sb.length() > 0) { 61 | sb.append("&"); 62 | } 63 | sb.append(paramName).append("=").append(URLEncoder.encode(arg, "UTF-8")); 64 | } 65 | return sb.toString(); 66 | } 67 | 68 | /** 69 | * builds up a String with the parameters for the filtering of fields 70 | * @param paramName 71 | * @param paramArgs 72 | * @return String 73 | * @throws IOException 74 | */ 75 | public static String buildParam(String paramName, String[] paramArgs) 76 | throws IOException { 77 | StringBuilder sb = new StringBuilder(); 78 | for (String arg : paramArgs) { 79 | if (sb.length() > 0) { 80 | sb.append("&"); 81 | } 82 | sb.append(paramName).append("=").append(URLEncoder.encode(arg, "UTF-8")); 83 | } 84 | return sb.toString(); 85 | } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/AllTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import org.junit.runner.RunWith; 19 | import org.junit.runners.Suite; 20 | import org.junit.runners.Suite.SuiteClasses; 21 | 22 | import com.twitter.hraven.datasource.TestAppVersionService; 23 | import com.twitter.hraven.datasource.TestFlowEventService; 24 | import com.twitter.hraven.datasource.TestFlowQueueKeyConverter; 25 | import com.twitter.hraven.datasource.TestJobHistoryRawService; 26 | import com.twitter.hraven.datasource.TestJobHistoryService; 27 | import com.twitter.hraven.util.TestBatchUtil; 28 | import com.twitter.hraven.util.TestByteUtil; 29 | 30 | @RunWith(Suite.class) 31 | @SuiteClasses({ TestFramework.class, TestJobDescFactoryBase.class, 32 | TestJobId.class, TestJobKey.class, TestJsonSerde.class, 33 | TestPigJobDescFactory.class, TestScaldingJobDescFactory.class, 34 | TestTaskKey.class, TestAppVersionService.class, 35 | TestFlowEventService.class, TestFlowQueueKeyConverter.class, 36 | TestJobHistoryRawService.class, TestJobHistoryService.class, 37 | TestBatchUtil.class, TestByteUtil.class }) 38 | public class AllTests { 39 | 40 | } 41 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestAppKey.java: -------------------------------------------------------------------------------- 1 | package com.twitter.hraven; 2 | 3 | import org.junit.Test; 4 | 5 | import static junit.framework.Assert.assertEquals; 6 | 7 | public class TestAppKey { 8 | @Test 9 | public void testToString() { 10 | AppKey key = new AppKey("c1@local", "auser", "app"); 11 | String expected = "c1@local" + Constants.SEP + "auser" + Constants.SEP + "app"; 12 | assertEquals(expected, key.toString()); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestAppSummary.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2014 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package com.twitter.hraven; 18 | 19 | import static org.junit.Assert.assertEquals; 20 | import static org.junit.Assert.assertNotNull; 21 | import static org.junit.Assert.assertTrue; 22 | 23 | import java.util.HashSet; 24 | import java.util.Set; 25 | 26 | import org.junit.Test; 27 | 28 | public class TestAppSummary { 29 | 30 | @Test 31 | public void testQueuesFromToString() { 32 | String s1 = "abc!xyz!hello" ; 33 | AppSummary as = new AppSummary(new AppKey("cluster", "user", "appid")); 34 | 35 | // test fromString 36 | as.setQueuesFromString(s1); 37 | Set actual = as.getQueue(); 38 | assertNotNull(actual); 39 | assertEquals(3, actual.size()); 40 | Set expected = new HashSet(); 41 | expected.add("abc"); 42 | expected.add("xyz"); 43 | expected.add("hello"); 44 | assertEquals(expected, actual); 45 | 46 | // test asString 47 | String queues = as.getQueuesAsString(); 48 | String[] qList = queues.split(Constants.SEP); 49 | assertEquals(expected.size(), qList.length); 50 | for(String q: qList) { 51 | assertTrue(expected.contains(q)); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestCounterMap.java: -------------------------------------------------------------------------------- 1 | package com.twitter.hraven; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertNotNull; 6 | import static org.junit.Assert.assertEquals; 7 | 8 | /** 9 | */ 10 | public class TestCounterMap { 11 | @Test 12 | public void testIterator() { 13 | Counter g1k1 = new Counter("group1", "key1", 10); 14 | Counter g1k2 = new Counter("group1", "key2", 20); 15 | Counter g2k1 = new Counter("group2", "key1", 100); 16 | Counter g3k1 = new Counter("group3", "key1", 200); 17 | 18 | CounterMap base = new CounterMap(); 19 | base.add(g1k1); 20 | base.add(g1k2); 21 | base.add(g2k1); 22 | base.add(g3k1); 23 | 24 | CounterMap copy = new CounterMap(); 25 | copy.addAll(base); 26 | 27 | Counter c = copy.getCounter("group1", "key1"); 28 | assertNotNull(c); 29 | assertEquals(g1k1, c); 30 | c = copy.getCounter("group1", "key2"); 31 | assertNotNull(c); 32 | assertEquals(g1k2, c); 33 | c = copy.getCounter("group2", "key1"); 34 | assertNotNull(c); 35 | assertEquals(g2k1, c); 36 | c = copy.getCounter("group3", "key1"); 37 | assertNotNull(c); 38 | assertEquals(g3k1, c); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestFlow.java: -------------------------------------------------------------------------------- 1 | package com.twitter.hraven; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertEquals; 6 | import static org.junit.Assert.assertTrue; 7 | 8 | /** 9 | */ 10 | public class TestFlow { 11 | private static final String CLUSTER = "cluster@dc"; 12 | private static final String USER = "testuser"; 13 | private static final String APP_ID = "testapp"; 14 | private static final String QUEUE1 = "queue1"; 15 | 16 | @Test 17 | public void testJobAggregation() { 18 | long runId = System.currentTimeMillis(); 19 | JobDetails job1 = new JobDetails(new JobKey(CLUSTER, USER, APP_ID, runId, "job_20120101000000_0001")); 20 | job1.setTotalMaps(100); 21 | job1.setTotalReduces(10); 22 | job1.setSubmitTime(runId); 23 | job1.setQueue(QUEUE1); 24 | CounterMap counters1 = new CounterMap(); 25 | counters1.add(new Counter("group1", "key1", 100)); 26 | counters1.add(new Counter("group2", "key1", 1000)); 27 | job1.setCounters(counters1); 28 | 29 | JobDetails job2 = new JobDetails(new JobKey(CLUSTER, USER, APP_ID, runId, "job_20120101000000_0002")); 30 | job2.setTotalMaps(10); 31 | job2.setTotalReduces(1); 32 | job2.setQueue(QUEUE1 + "2"); 33 | job2.setSubmitTime(runId + 3600000L); 34 | CounterMap counters2 = new CounterMap(); 35 | counters2.add(new Counter("group2", "key2", 1)); 36 | job2.setCounters(counters2); 37 | 38 | JobDetails job3 = new JobDetails(new JobKey(CLUSTER, USER, APP_ID, runId, "job_20120101000000_0003")); 39 | job3.setTotalMaps(1000); 40 | job3.setTotalReduces(10); 41 | job3.setSubmitTime(runId + 4800000L); 42 | job3.setQueue(QUEUE1+ "3"); 43 | CounterMap counters3 = new CounterMap(); 44 | counters3.add(new Counter("group1", "key1", 50)); 45 | counters3.add(new Counter("group2", "key1", 100)); 46 | job3.setCounters(counters3); 47 | job3.setMapCounters(counters3); 48 | job3.setReduceCounters(counters3); 49 | 50 | Flow flow = new Flow(new FlowKey(CLUSTER, USER, APP_ID, runId)); 51 | flow.addJob(job1); 52 | flow.addJob(job2); 53 | flow.addJob(job3); 54 | 55 | assertEquals(3, flow.getJobCount()); 56 | // totalMaps = 100 + 10 + 1000 57 | assertEquals(1110, flow.getTotalMaps()); 58 | // totalReduces = 10 + 1 + 10 59 | assertEquals(21, flow.getTotalReduces()); 60 | // ensure the queue for the first job in the flow is set as queue for the flow 61 | assertTrue(QUEUE1.equals(flow.getQueue())); 62 | // total counters: group1, key1 = 100 + 50 63 | assertEquals(150, flow.getCounters().getCounter("group1", "key1").getValue()); 64 | // total counters: group2, key1 = 1000 + 100 65 | assertEquals(1100, flow.getCounters().getCounter("group2", "key1").getValue()); 66 | // total counters: group2, key2 = 1 67 | assertEquals(1, flow.getCounters().getCounter("group2", "key2").getValue()); 68 | // map counters: group1, key1 = 50 69 | assertEquals(50, flow.getMapCounters().getCounter("group1", "key1").getValue()); 70 | // map counters: group2, key1 = 100 71 | assertEquals(100, flow.getMapCounters().getCounter("group2", "key1").getValue()); 72 | // reduce counters: group1, key1 = 50 73 | assertEquals(50, flow.getReduceCounters().getCounter("group1", "key1").getValue()); 74 | // reduce counters: group2, key1 = 100 75 | assertEquals(100, flow.getReduceCounters().getCounter("group2", "key1").getValue()); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestFlowKey.java: -------------------------------------------------------------------------------- 1 | package com.twitter.hraven; 2 | 3 | import org.junit.Test; 4 | 5 | import static junit.framework.Assert.assertEquals; 6 | 7 | public class TestFlowKey { 8 | @Test 9 | public void testToString() { 10 | FlowKey key = new FlowKey("c1@local", "auser", "app", 1345L); 11 | String expected = "c1@local" + Constants.SEP + "auser" 12 | + Constants.SEP + "app" + Constants.SEP + 1345L; 13 | assertEquals(expected, key.toString()); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestFramework.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import static com.twitter.hraven.Framework.NONE; 19 | import static com.twitter.hraven.Framework.PIG; 20 | import static com.twitter.hraven.Framework.SCALDING; 21 | import static junit.framework.Assert.assertEquals; 22 | import static junit.framework.Assert.assertNotNull; 23 | import static junit.framework.Assert.assertTrue; 24 | 25 | import org.junit.Test; 26 | 27 | import com.twitter.hraven.Framework; 28 | 29 | /** 30 | * Test {@link Framework} 31 | */ 32 | public class TestFramework { 33 | 34 | /** 35 | * Test going back and forth between code and enum 36 | */ 37 | @Test 38 | public void testGetCode() { 39 | assertEquals(PIG, Framework.get(PIG.getCode())); 40 | assertEquals(SCALDING, Framework.get(SCALDING.getCode())); 41 | assertEquals(NONE, Framework.get(NONE.getCode())); 42 | } 43 | 44 | /** 45 | * Confirm descriptions are not null or empty. 46 | */ 47 | @Test 48 | public void getDescription() { 49 | assertNotNull(PIG.getDescription()); 50 | assertNotNull(SCALDING.getDescription()); 51 | assertNotNull(NONE.getDescription()); 52 | 53 | assertTrue("Description is not expected to be empty", PIG.getDescription().length() > 0); 54 | assertTrue("Description is not expected to be empty", SCALDING.getDescription().length() > 0); 55 | assertTrue("Description is not expected to be empty", NONE.getDescription().length() > 0); 56 | } 57 | 58 | 59 | } 60 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestHadoopVersion.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package com.twitter.hraven; 18 | 19 | import static org.junit.Assert.assertEquals; 20 | import static org.junit.Assert.assertNull; 21 | import static org.junit.Assert.assertTrue; 22 | 23 | import org.junit.Test; 24 | 25 | /** 26 | * test class for hadoop versions 27 | */ 28 | public class TestHadoopVersion { 29 | 30 | private enum ExpVersions { 31 | ONE, TWO 32 | } 33 | 34 | @Test 35 | public void checkVersions() { 36 | assertEquals(ExpVersions.values().length, HadoopVersion.values().length); 37 | for (HadoopVersion hv : HadoopVersion.values()) { 38 | assertTrue(ExpVersions.valueOf(hv.toString()) != null); 39 | } 40 | } 41 | 42 | @Test(expected=IllegalArgumentException.class) 43 | public void testNonExistentVersion() { 44 | assertNull(HadoopVersion.valueOf("DOES NOT EXIST")); 45 | } 46 | }; 47 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestJobDescFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertNull; 20 | 21 | import org.apache.hadoop.conf.Configuration; 22 | import org.junit.Test; 23 | 24 | public class TestJobDescFactory { 25 | @Test 26 | public void testCluster() { 27 | 28 | // load the test properties file 29 | Cluster.loadHadoopClustersProps("testhRavenClusters.properties"); 30 | 31 | Configuration c = new Configuration(false); 32 | c.set(JobDescFactory.JOBTRACKER_KEY, "cluster1.identifier1.example.com:8021"); 33 | String result = JobDescFactory.getCluster(c); 34 | assertEquals("cluster1@identifier1", result); 35 | 36 | c = new Configuration(false); 37 | c.set(JobDescFactory.JOBTRACKER_KEY, "hbase-cluster2.identifier2.example.com:8021"); 38 | result = JobDescFactory.getCluster(c); 39 | assertEquals("hbase-cluster2@identifier2", result); 40 | 41 | c = new Configuration(false); 42 | c.set(JobDescFactory.RESOURCE_MANAGER_KEY, "cluster2.identifier2.example.com:10020"); 43 | result = JobDescFactory.getCluster(c); 44 | assertEquals("cluster2@identifier2", result); 45 | 46 | c = new Configuration(false); 47 | c.set(JobDescFactory.JOBTRACKER_KEY, ""); 48 | result = JobDescFactory.getCluster(c); 49 | assertNull(result); 50 | 51 | c = new Configuration(false); 52 | result = JobDescFactory.getCluster(c); 53 | assertNull(result); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestJobDescFactoryBase.java: -------------------------------------------------------------------------------- 1 | package com.twitter.hraven; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.junit.Test; 5 | import static junit.framework.Assert.assertEquals; 6 | 7 | 8 | public class TestJobDescFactoryBase extends JobDescFactoryBase { 9 | 10 | public static final String UNSAFE_NAME = "soMe long" + Constants.SEP + "name"; 11 | public static final String SAFE_NAME = "soMe_long_name"; 12 | 13 | /** 14 | * Not interesting for this particular test. 15 | * @param qualifiedJobId 16 | * @param submitTimeMillis 17 | * @param jobConf 18 | * @return 19 | */ 20 | JobDesc create(QualifiedJobId qualifiedJobId, long submitTimeMillis, 21 | Configuration jobConf) { 22 | // Not interesting for this test. 23 | return null; 24 | } 25 | 26 | /** 27 | * @param jobName 28 | * @return 29 | */ 30 | String getAppIdFromJobName(String jobName) { 31 | // Identity transform. 32 | return jobName; 33 | } 34 | 35 | /** 36 | * Test the method to get the app ID from the JobConf. 37 | */ 38 | @Test 39 | public void testgetAppId() { 40 | Configuration conf = new Configuration(); 41 | conf.set(Constants.APP_NAME_CONF_KEY, UNSAFE_NAME); 42 | assertEquals(SAFE_NAME, getAppId(conf)); 43 | } 44 | 45 | /** 46 | * Test the method to get the app ID 47 | * from a hadoop2 JobConf 48 | */ 49 | @Test 50 | public void testgetAppIdHadoop2() { 51 | Configuration conf = new Configuration(); 52 | // ensure hadoop1 config key is blank 53 | conf.set(Constants.JOB_NAME_CONF_KEY, ""); 54 | // ensure batch.desc is blank 55 | conf.set(Constants.APP_NAME_CONF_KEY, ""); 56 | // set the hadoop2 config key 57 | conf.set(Constants.JOB_NAME_HADOOP2_CONF_KEY, "abc.def.xyz"); 58 | assertEquals("abc.def.xyz", getAppId(conf)); 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestMRJobDescFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import org.apache.hadoop.conf.Configuration; 19 | import org.junit.Assert; 20 | import org.junit.Test; 21 | 22 | public class TestMRJobDescFactory { 23 | 24 | @Test 25 | public void testCreate() { 26 | MRJobDescFactory mrFac = new MRJobDescFactory(); 27 | Configuration conf = new Configuration(); 28 | conf.set(Constants.USER_CONF_KEY, "testuser"); 29 | QualifiedJobId qid = new QualifiedJobId("clusterId", "job_211212010355_45240"); 30 | 31 | JobDesc jd = null; 32 | 33 | // batch.desc and mapred.job.name are not set 34 | jd = mrFac.create(qid, 1354772953639L, conf); 35 | Assert.assertEquals(jd.getAppId(), Constants.UNKNOWN); 36 | 37 | // batch.desc is not set, but mapred.job.name is set 38 | String name = "Crazy Job name! : test 1 2 3!"; 39 | String processedName = "Crazy_Job_name__:_test_1_2_3_"; 40 | conf.set("mapred.job.name", name); 41 | jd = mrFac.create(qid, 1354772953639L, conf); 42 | Assert.assertEquals(jd.getAppId(), processedName); 43 | 44 | // batch.desc is set and mapred.job.name is set 45 | name = "Other Crazy Job name! : test 1 2 3!"; 46 | processedName = "Other_Crazy_Job_name__:_test_1_2_3_"; 47 | conf.set("batch.desc", name); 48 | jd = mrFac.create(qid, 1354772953639L, conf); 49 | Assert.assertEquals(jd.getAppId(), processedName); 50 | 51 | // batch.desc is set set, and mapred.job.name is not set 52 | conf = new Configuration(); 53 | conf.set(Constants.USER_CONF_KEY, "testuser"); 54 | name = "Third Crazy Job name! : test 1 2 3!"; 55 | processedName = "Third_Crazy_Job_name__:_test_1_2_3_"; 56 | conf.set("batch.desc", name); 57 | jd = mrFac.create(qid, 1354772953639L, conf); 58 | Assert.assertEquals(jd.getAppId(), processedName); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestPigJobDescFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import static junit.framework.Assert.assertEquals; 19 | 20 | import org.junit.Test; 21 | 22 | import com.twitter.hraven.PigJobDescFactory; 23 | 24 | public class TestPigJobDescFactory { 25 | 26 | String[][] testJobNames = { 27 | { null, null }, 28 | { "foo", "foo" }, 29 | { "PigLatin:daily_job:daily_2012/06/22-00:00:00_to_2012/06/23-00:00:00", 30 | PigJobDescFactory.SCHEDULED_PREFIX + "daily_job:daily" }, 31 | { "PigLatin:hourly_job:hourly_2012/06/24-08:00:00_to_2012/06/24-09:00:00", 32 | PigJobDescFactory.SCHEDULED_PREFIX + "hourly_job:hourly" }, 33 | { "PigLatin:hourly_foo:hourly:foo_2012/06/24-08:00:00_to_2012/06/24-09:00:00", 34 | PigJobDescFactory.SCHEDULED_PREFIX + "hourly_foo:hourly:foo" }, 35 | { "PigLatin:regular_job.pig", "PigLatin:regular_job.pig" } 36 | }; 37 | 38 | Object[][] testLogFileNames = { 39 | { null, 0L }, 40 | { "/var/log/pig/pig_1340659035863.log", 1340659035863L }, 41 | { "/var/log/pig/pig_log.log", 0L }, 42 | }; 43 | 44 | @Test 45 | public void testJobNameToBatchDesc() { 46 | PigJobDescFactory pigFactory = new PigJobDescFactory(); 47 | for (String[] inputOuput : testJobNames) { 48 | String input = inputOuput[0]; 49 | String expected = inputOuput[1]; 50 | 51 | String found = pigFactory.getAppIdFromJobName(input); 52 | assertEquals("Unexpected result found when parsing jobName=" + input, expected, found); 53 | } 54 | } 55 | 56 | @Test 57 | public void testLogFileToStartTime() { 58 | 59 | for (Object[] inputOuput : testLogFileNames) { 60 | String input = (String)inputOuput[0]; 61 | long expected = (Long)inputOuput[1]; 62 | 63 | long found = PigJobDescFactory.getScriptStartTimeFromLogfileName(input); 64 | assertEquals("Unexpected result found when parsing logFileName=" + input, expected, found); 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestQualifiedPathKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2014 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import org.junit.Test; 19 | 20 | import static org.junit.Assert.assertEquals; 21 | import static org.junit.Assert.assertNotNull; 22 | import static org.junit.Assert.assertNull; 23 | 24 | /** 25 | * tests the {@link HdfsStatsKeyConverter} class 26 | */ 27 | public class TestQualifiedPathKey { 28 | 29 | private static final String cluster1 = "cluster1"; 30 | private static final String path1 = "path1"; 31 | private static final String namespace1 = "namespace1"; 32 | private static final String namespace2 = "namespace2"; 33 | 34 | @Test 35 | public void testConstructor1() throws Exception { 36 | QualifiedPathKey key1 = new QualifiedPathKey(cluster1, path1); 37 | testKeyComponents(key1); 38 | assertNull(key1.getNamespace()); 39 | } 40 | 41 | @Test 42 | public void testConstructor2() throws Exception { 43 | QualifiedPathKey key1 = new QualifiedPathKey(cluster1, path1, namespace1); 44 | testKeyComponents(key1); 45 | assertNotNull(key1.getNamespace()); 46 | assertEquals(key1.getNamespace(), namespace1); 47 | } 48 | 49 | @Test 50 | public void testEquality() throws Exception { 51 | QualifiedPathKey key1 = new QualifiedPathKey(cluster1, path1); 52 | QualifiedPathKey key2 = new QualifiedPathKey(cluster1, path1); 53 | assertEquals(key1.compareTo(key2), 0); 54 | assertEquals(key1.hashCode(), key2.hashCode()); 55 | assertEquals(key1, key2); 56 | } 57 | 58 | @Test 59 | public void testInEqualityWithNamespace() throws Exception { 60 | // keep only the namespace name different 61 | QualifiedPathKey key1 = new QualifiedPathKey(cluster1, path1, namespace1); 62 | QualifiedPathKey key2 = new QualifiedPathKey(cluster1, path1, namespace2); 63 | assertEquals(key1.compareTo(key2), -1); 64 | } 65 | 66 | @Test 67 | public void testNullHashCode() throws Exception { 68 | QualifiedPathKey key1 = new QualifiedPathKey(null, null); 69 | QualifiedPathKey key2 = new QualifiedPathKey(" ", " "); 70 | assertEquals(key1.hashCode(), key2.hashCode()); 71 | } 72 | 73 | private void testKeyComponents(QualifiedPathKey key1) { 74 | assertNotNull(key1); 75 | assertEquals(key1.getCluster(), cluster1); 76 | assertEquals(key1.getPath(), path1); 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/TestTaskKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven; 17 | 18 | import org.apache.commons.logging.Log; 19 | import org.apache.commons.logging.LogFactory; 20 | import org.apache.hadoop.hbase.util.Bytes; 21 | import org.junit.Test; 22 | 23 | import com.twitter.hraven.datasource.TaskKeyConverter; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test usage and serialization of TaskKey 29 | */ 30 | public class TestTaskKey { 31 | private static Log LOG = LogFactory.getLog(TestTaskKey.class); 32 | 33 | @Test 34 | public void testSerialization() { 35 | TaskKeyConverter conv = new TaskKeyConverter(); 36 | 37 | TaskKey key1 = new TaskKey( 38 | new JobKey("test@local", "testuser", "app", 1234L, "job_20120101000000_1111"), "m_001"); 39 | assertEquals("test@local", key1.getCluster()); 40 | assertEquals("testuser", key1.getUserName()); 41 | assertEquals("app", key1.getAppId()); 42 | assertEquals(1234L, key1.getRunId()); 43 | assertEquals("job_20120101000000_1111", key1.getJobId().getJobIdString()); 44 | assertEquals("m_001", key1.getTaskId()); 45 | 46 | byte[] key1Bytes = conv.toBytes(key1); 47 | TaskKey key2 = conv.fromBytes(key1Bytes); 48 | assertKey(key1, key2); 49 | 50 | TaskKey key3 = conv.fromBytes( conv.toBytes(key2) ); 51 | assertKey(key1, key3); 52 | 53 | // test with a run ID containing the separator 54 | long now = System.currentTimeMillis(); 55 | byte[] encoded = Bytes.toBytes(Long.MAX_VALUE - now); 56 | // replace last byte with separator and reconvert to long 57 | Bytes.putBytes(encoded, encoded.length-Constants.SEP_BYTES.length, 58 | Constants.SEP_BYTES, 0, Constants.SEP_BYTES.length); 59 | long badId = Long.MAX_VALUE - Bytes.toLong(encoded); 60 | LOG.info("Bad run ID is " + badId); 61 | 62 | TaskKey badKey1 = new TaskKey( 63 | new JobKey(key1.getQualifiedJobId(), key1.getUserName(), key1.getAppId(), badId), 64 | key1.getTaskId()); 65 | byte[] badKeyBytes = conv.toBytes(badKey1); 66 | TaskKey badKey2 = conv.fromBytes(badKeyBytes); 67 | assertKey(badKey1, badKey2); 68 | } 69 | 70 | @Test 71 | public void testToString() { 72 | JobKey jKey = new JobKey("test@local", "testuser", "app", 1234L, "job_20120101000000_1111"); 73 | TaskKey key = new TaskKey(jKey, "m_001"); 74 | String expected = jKey.toString() + Constants.SEP + "m_001"; 75 | assertEquals(expected, key.toString()); 76 | } 77 | 78 | private void assertKey(TaskKey expected, TaskKey actual) { 79 | assertEquals(expected.getCluster(), actual.getCluster()); 80 | assertEquals(expected.getUserName(), actual.getUserName()); 81 | assertEquals(expected.getAppId(), actual.getAppId()); 82 | assertEquals(expected.getRunId(), actual.getRunId()); 83 | assertEquals(expected.getJobId(), actual.getJobId()); 84 | assertEquals(expected.getTaskId(), actual.getTaskId()); 85 | assertEquals(expected.hashCode(),actual.hashCode()); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/datasource/TestFlowQueueKeyConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.datasource; 17 | 18 | import com.twitter.hraven.Flow; 19 | import com.twitter.hraven.FlowQueueKey; 20 | import com.twitter.hraven.datasource.FlowQueueKeyConverter; 21 | 22 | import org.junit.Test; 23 | 24 | import static org.junit.Assert.assertEquals; 25 | import static org.junit.Assert.assertNotNull; 26 | 27 | /** 28 | */ 29 | public class TestFlowQueueKeyConverter { 30 | @Test 31 | public void testFlowQueueKey() throws Exception { 32 | FlowQueueKeyConverter conv = new FlowQueueKeyConverter(); 33 | 34 | long now = System.currentTimeMillis(); 35 | FlowQueueKey key1 = new FlowQueueKey("test@test", Flow.Status.RUNNING, now, "flow1"); 36 | 37 | byte[] key1Bytes = conv.toBytes(key1); 38 | FlowQueueKey key2 = conv.fromBytes(key1Bytes); 39 | assertNotNull(key2); 40 | assertEquals(key1.getCluster(), key2.getCluster()); 41 | assertEquals(key1.getStatus(), key2.getStatus()); 42 | assertEquals(key1.getTimestamp(), key2.getTimestamp()); 43 | assertEquals(key1.getFlowId(), key2.getFlowId()); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/rest/TestPaginatedResult.java: -------------------------------------------------------------------------------- 1 | package com.twitter.hraven.rest; 2 | import static junit.framework.Assert.assertEquals; 3 | import static junit.framework.Assert.assertNotNull; 4 | import static junit.framework.Assert.assertNull; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | import org.apache.hadoop.hbase.util.Bytes; 8 | import org.junit.Test; 9 | 10 | /** 11 | * Unit tests for the PaginatedResult class 12 | */ 13 | 14 | public class TestPaginatedResult { 15 | 16 | private final int INTEGER_PAGE_LIMIT = 10; 17 | 18 | @Test 19 | public void checkOnePageResults() { 20 | PaginatedResult pageOfInts = new PaginatedResult(INTEGER_PAGE_LIMIT); 21 | assertNotNull(pageOfInts); 22 | assertEquals(pageOfInts.getLimit(), INTEGER_PAGE_LIMIT); 23 | List actualValues = new ArrayList(); 24 | populateListOfInts(actualValues, INTEGER_PAGE_LIMIT); 25 | pageOfInts.setValues(actualValues); 26 | List expectedValues = new ArrayList(); 27 | populateListOfInts(expectedValues, INTEGER_PAGE_LIMIT); 28 | assertEquals(actualValues.size(), pageOfInts.getLimit()); 29 | assertEquals(expectedValues.size(), pageOfInts.getLimit()); 30 | assertNull(pageOfInts.getNextStartRow()); 31 | assertEquals(expectedValues, pageOfInts.getValues()); 32 | } 33 | 34 | @Test 35 | public void checkMultiplePageResults() { 36 | final int EXTRA_RESULTS = 1; 37 | final Integer NEXT_START_ROW = (INTEGER_PAGE_LIMIT + 1) * INTEGER_PAGE_LIMIT; 38 | PaginatedResult pageOfInts = new PaginatedResult(INTEGER_PAGE_LIMIT); 39 | assertNotNull(pageOfInts); 40 | assertEquals(pageOfInts.getLimit(), INTEGER_PAGE_LIMIT); 41 | List actualValues = new ArrayList(); 42 | populateListOfInts(actualValues, INTEGER_PAGE_LIMIT + EXTRA_RESULTS); 43 | pageOfInts.setValues(actualValues.subList(0, INTEGER_PAGE_LIMIT)); 44 | List expectedValues = new ArrayList(); 45 | populateListOfInts(expectedValues, INTEGER_PAGE_LIMIT); 46 | pageOfInts.setNextStartRow(Bytes.toBytes(actualValues.get( INTEGER_PAGE_LIMIT))); 47 | assertEquals(actualValues.size(), pageOfInts.getLimit() + EXTRA_RESULTS); 48 | assertEquals(expectedValues.size(), pageOfInts.getLimit()); 49 | assertNotNull(pageOfInts.getNextStartRow()); 50 | assertEquals(NEXT_START_ROW.intValue(), Bytes.toInt(pageOfInts.getNextStartRow())); 51 | assertEquals(expectedValues, pageOfInts.getValues()); 52 | } 53 | 54 | @Test 55 | public void checkLessThanOnePageResults() { 56 | final int LESS_THAN_ONE_PAGE = INTEGER_PAGE_LIMIT / 2; 57 | PaginatedResult pageOfInts = new PaginatedResult(INTEGER_PAGE_LIMIT); 58 | assertNotNull(pageOfInts); 59 | assertEquals(pageOfInts.getLimit(), INTEGER_PAGE_LIMIT); 60 | List actualValues = new ArrayList(); 61 | populateListOfInts(actualValues, LESS_THAN_ONE_PAGE); 62 | pageOfInts.setValues(actualValues); 63 | List expectedValues = new ArrayList(); 64 | populateListOfInts(expectedValues, LESS_THAN_ONE_PAGE); 65 | assertEquals(LESS_THAN_ONE_PAGE, pageOfInts.getValues().size()); 66 | assertNull(pageOfInts.getNextStartRow()); 67 | assertEquals(expectedValues, pageOfInts.getValues()); 68 | 69 | } 70 | 71 | private void populateListOfInts(List inputValues, int limit) { 72 | for (int i = 1; i <= limit; i++) { 73 | inputValues.add(i * INTEGER_PAGE_LIMIT); 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /hraven-core/src/test/java/com/twitter/hraven/util/TestHadoopConfUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package com.twitter.hraven.util; 18 | 19 | import static org.junit.Assert.assertEquals; 20 | import static org.junit.Assert.assertFalse; 21 | import static org.junit.Assert.assertNull; 22 | import static org.junit.Assert.assertTrue; 23 | import java.io.FileInputStream; 24 | import java.io.FileNotFoundException; 25 | import org.apache.hadoop.conf.Configuration; 26 | import org.junit.Test; 27 | import com.twitter.hraven.Constants; 28 | 29 | public class TestHadoopConfUtil { 30 | 31 | @Test 32 | public void testContains() throws FileNotFoundException { 33 | final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml"; 34 | Configuration jobConf = new Configuration(); 35 | jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME)); 36 | assertTrue(HadoopConfUtil.contains(jobConf, 37 | Constants.USER_CONF_KEY_HADOOP2)); 38 | assertFalse(HadoopConfUtil.contains(jobConf, Constants.USER_CONF_KEY)); 39 | } 40 | 41 | @Test 42 | public void testGetUserNameInConf() throws FileNotFoundException { 43 | final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml"; 44 | Configuration jobConf = new Configuration(); 45 | jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME)); 46 | String userName = HadoopConfUtil.getUserNameInConf(jobConf); 47 | assertEquals(userName, "user"); 48 | } 49 | 50 | @Test 51 | public void testGetQueueName() throws FileNotFoundException { 52 | final String JOB_CONF_FILE_NAME = "src/test/resources/job_1329348432655_0001_conf.xml"; 53 | Configuration jobConf = new Configuration(); 54 | jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME)); 55 | String queueName = HadoopConfUtil.getQueueName(jobConf); 56 | assertEquals(queueName, "default"); 57 | } 58 | 59 | @Test(expected=IllegalArgumentException.class) 60 | public void checkUserNameAlwaysSet() throws FileNotFoundException { 61 | final String JOB_CONF_FILE_NAME = 62 | "src/test/resources/job_1329348432655_0001_conf.xml"; 63 | 64 | Configuration jobConf = new Configuration(); 65 | jobConf.addResource(new FileInputStream(JOB_CONF_FILE_NAME)); 66 | 67 | // unset the user name to confirm exception thrown 68 | jobConf.set(Constants.USER_CONF_KEY_HADOOP2, ""); 69 | jobConf.set(Constants.USER_CONF_KEY, ""); 70 | // test the hraven user name setting 71 | String hRavenUserName = HadoopConfUtil.getUserNameInConf(jobConf); 72 | assertNull(hRavenUserName); 73 | } 74 | } -------------------------------------------------------------------------------- /hraven-core/src/test/resources/done/something.example.com_1337787092259_job_201205231531_256984_userName1_App1: -------------------------------------------------------------------------------- 1 | Meta VERSION="1" . 2 | Job JOBID="job_201205231531_256984" JOBNAME="pqrs:abc_something:xyz" USER="user1234" SUBMIT_TIME="1338958320124" JOBCONF="hdfs://something\.example\.com/user/user1234/\.staging/job_201205231531_256984/job\.xml" VIEW_JOB="*" MODIFY_JOB="*" JOB_QUEUE="default" . 3 | Job JOBID="job_201205231531_256984" JOB_PRIORITY="NORMAL" . 4 | Job JOBID="job_201205231531_256984" JOB_STATUS="RUNNING" . 5 | -------------------------------------------------------------------------------- /hraven-core/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootCategory=INFO,console 2 | 3 | # 4 | # console 5 | # Add "console" to rootlogger above if you want to use this 6 | # 7 | log4j.appender.console=org.apache.log4j.ConsoleAppender 8 | log4j.appender.console.target=System.err 9 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 10 | log4j.appender.console.layout.ConversionPattern=%d %-5p [%t] %C{2}(%L): %m%n 11 | 12 | # Custom Logging levels 13 | 14 | #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG 15 | 16 | log4j.logger.org.apache.hadoop=WARN 17 | log4j.logger.org.apache.zookeeper=ERROR 18 | log4j.logger.org.apache.hadoop.hbase=INFO 19 | 20 | log4j.logger.com.twitter.hraven=DEBUG 21 | -------------------------------------------------------------------------------- /hraven-core/src/test/resources/testhRavenClusters.properties: -------------------------------------------------------------------------------- 1 | #This property file is used to map the jobtracker address from a configuration file to a cluster identifier. 2 | cluster1.identifier1.example.com=cluster1@identifier1 3 | cluster2.identifier2.example.com=cluster2@identifier2 4 | hbase-cluster2.identifier2.example.com=hbase-cluster2@identifier2 5 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/etl/FileStatusModificationComparator.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | import java.util.Comparator; 19 | 20 | import org.apache.hadoop.fs.FileStatus; 21 | 22 | public class FileStatusModificationComparator implements Comparator { 23 | 24 | /** 25 | * Default constructor. 26 | */ 27 | public FileStatusModificationComparator() { 28 | } 29 | 30 | /* 31 | * (non-Javadoc) 32 | * 33 | * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) 34 | */ 35 | public int compare(FileStatus fileStatus1, FileStatus fileStatus2) { 36 | 37 | // Do the obligatory null checks. 38 | if ((fileStatus1 == null) && (fileStatus2 == null)) { 39 | return 0; 40 | } 41 | if (fileStatus1 == null) { 42 | return -1; 43 | } 44 | if (fileStatus2 == null) { 45 | return 1; 46 | } 47 | 48 | long modificationTime1 = fileStatus1.getModificationTime(); 49 | long modificationTime2 = fileStatus2.getModificationTime(); 50 | 51 | return (modificationTime1 < modificationTime2 ? -1 52 | : (modificationTime1 == modificationTime2 ? 0 : 1)); 53 | }; 54 | 55 | } 56 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/etl/ImportException.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | /** 19 | */ 20 | public class ImportException extends RuntimeException { 21 | 22 | private static final long serialVersionUID = 2312684791991178660L; 23 | 24 | public ImportException(String message) { 25 | super(message); 26 | } 27 | 28 | public ImportException(String message, Throwable cause) { 29 | super(message, cause); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/etl/JobFilePathFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | import org.apache.hadoop.fs.Path; 19 | import org.apache.hadoop.fs.PathFilter; 20 | 21 | /** 22 | * {@link PathFilter} that accepts only job conf or job history files. 23 | * 24 | */ 25 | public class JobFilePathFilter implements PathFilter { 26 | 27 | /** 28 | * Default constructor. 29 | */ 30 | public JobFilePathFilter() { 31 | } 32 | 33 | /* 34 | * Accept only those paths that are either job confs or job history files. 35 | * 36 | * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path) 37 | */ 38 | @Override 39 | public boolean accept(Path path) { 40 | // Ideally we want to do this 41 | // JobFile jobFile = new JobFile(path.getName()); 42 | // return (jobFile.isJobConfFile() || jobFile.isJobHistoryFile()); 43 | // Aside from that not being efficient, it also chokes on input directories. 44 | 45 | // therefore, allow anythying but CRC files. The record reader will have to deal with the rest. 46 | return !((path == null) || (path.getName().endsWith(".crc"))); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | import java.util.List; 19 | import org.apache.hadoop.hbase.client.Put; 20 | 21 | import com.twitter.hraven.JobDetails; 22 | import com.twitter.hraven.JobKey; 23 | import com.twitter.hraven.datasource.ProcessingException; 24 | 25 | /** 26 | * Interface for job history file parsing Should be implemented for parsing 27 | * different formats of history files change by MAPREDUCE-1016 in hadoop1.0 as 28 | * well as hadoop2.0 29 | * 30 | */ 31 | 32 | public interface JobHistoryFileParser { 33 | 34 | /** 35 | * this method should parse the history file and populate the puts 36 | * 37 | * @throws ProcessingException 38 | */ 39 | public void parse(byte[] historyFile, JobKey jobKey); 40 | 41 | /** 42 | * Calculates the megabytmillis taken up by this job 43 | * should be called after {@link JobHistoryFileParser#parse(byte[], JobKey)} 44 | * since the values it needs for calculations are 45 | * populated in the parser object while parsing 46 | */ 47 | public Long getMegaByteMillis(); 48 | 49 | /** 50 | * Return the generated list of job puts assembled when history file is 51 | * parsed 52 | * 53 | * @return a list of jobPuts 54 | */ 55 | public List getJobPuts(); 56 | 57 | /** 58 | * Return the generated list of task puts assembled when history file is 59 | * parsed 60 | * 61 | * @return a list of taskPuts 62 | */ 63 | public List getTaskPuts(); 64 | 65 | /** 66 | * get the Job Details object for this history file so that 67 | * it can be used for storing aggregation summary 68 | * @return JobDetails 69 | */ 70 | public JobDetails getJobDetails(); 71 | } 72 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/etl/JobHistoryFileParserFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | import org.apache.commons.lang.StringUtils; 19 | import org.apache.hadoop.conf.Configuration; 20 | import com.twitter.hraven.HadoopVersion; 21 | 22 | /** 23 | * Deal with {@link JobHistoryFileParser} implementations. 24 | * Creates an appropriate Job History File Parser Object based 25 | * on the type of job history file 26 | */ 27 | public class JobHistoryFileParserFactory { 28 | 29 | /** 30 | * NOTE that this version string is a replica of 31 | * {@link org.apache.hadoop.mapreduce.jobhistory.EventWriter} Since that class is not public, the 32 | * VERSION variable there becomes package-level visible and hence we need a replica 33 | */ 34 | public static final String HADOOP2_VERSION_STRING = "Avro-Json"; 35 | private static final int HADOOP2_VERSION_LENGTH = 9; 36 | 37 | /** 38 | * determines the verison of hadoop that the history file belongs to 39 | * 40 | * @return 41 | * returns 1 for hadoop 1 (pre MAPREDUCE-1016) 42 | * returns 2 for newer job history files 43 | * (newer job history files have "AVRO-JSON" as the signature at the start of the file, 44 | * REFERENCE: https://issues.apache.org/jira/browse/MAPREDUCE-1016? \ 45 | * focusedCommentId=12763160& \ page=com.atlassian.jira.plugin.system 46 | * .issuetabpanels:comment-tabpanel#comment-12763160 47 | * 48 | * @throws IllegalArgumentException if neither match 49 | */ 50 | public static HadoopVersion getVersion(byte[] historyFileContents) { 51 | if(historyFileContents.length > HADOOP2_VERSION_LENGTH) { 52 | // the first 10 bytes in a hadoop2.0 history file contain Avro-Json 53 | String version2Part = new String(historyFileContents, 0, HADOOP2_VERSION_LENGTH); 54 | if (StringUtils.equalsIgnoreCase(version2Part, HADOOP2_VERSION_STRING)) { 55 | return HadoopVersion.TWO; 56 | } 57 | } 58 | // throw an exception if we did not find any matching version 59 | throw new IllegalArgumentException(" Unknown format of job history file: " + historyFileContents); 60 | } 61 | 62 | /** 63 | * creates an instance of 64 | * {@link JobHistoryFileParserHadoop2} that can parse post MAPREDUCE-1016 job history files 65 | * 66 | * @param historyFileContents: history file contents 67 | * 68 | * @return an object that can parse job history files 69 | * Or return null if either input is null 70 | */ 71 | public static JobHistoryFileParser createJobHistoryFileParser( 72 | byte[] historyFileContents, Configuration jobConf) throws IllegalArgumentException { 73 | 74 | if (historyFileContents == null) { 75 | throw new IllegalArgumentException( 76 | "Job history contents should not be null"); 77 | } 78 | 79 | HadoopVersion version = getVersion(historyFileContents); 80 | 81 | switch (version) { 82 | case TWO: 83 | return new JobHistoryFileParserHadoop2(jobConf); 84 | 85 | default: 86 | throw new IllegalArgumentException( 87 | " Unknown format of job history file "); 88 | } 89 | } 90 | 91 | /** 92 | * @return HISTORY_FILE_VERSION2 93 | */ 94 | public static HadoopVersion getHistoryFileVersion2() { 95 | return HadoopVersion.TWO; 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/etl/JobRunner.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | import java.util.concurrent.Callable; 19 | 20 | import org.apache.hadoop.mapreduce.Job; 21 | 22 | /** 23 | * Can be used to run a single Hadoop job. The {@link #call()} method will block 24 | * until the job is complete and will return a non-null return value indicating 25 | * the success of the Hadoop job. 26 | */ 27 | public class JobRunner implements Callable { 28 | 29 | private volatile boolean isCalled = false; 30 | private final Job job; 31 | 32 | /** 33 | * Post processing step that gets called upon successful completion of the 34 | * Hadoop job. 35 | */ 36 | private final Callable postProcessor; 37 | 38 | /** 39 | * Constructor 40 | * 41 | * @param job 42 | * to job to run in the call method. 43 | * @param postProcessor 44 | * Post processing step that gets called upon successful completion 45 | * of the Hadoop job. Can be null, in which case it will be skipped. 46 | * Final results will be the return value of this final processing 47 | * step. 48 | */ 49 | public JobRunner(Job job, Callable postProcessor) { 50 | this.job = job; 51 | this.postProcessor = postProcessor; 52 | } 53 | 54 | /* 55 | * (non-Javadoc) 56 | * 57 | * @see java.util.concurrent.Callable#call() 58 | */ 59 | @Override 60 | public Boolean call() throws Exception { 61 | 62 | // Guard to make sure we get called only once. 63 | if (isCalled) { 64 | return false; 65 | } else { 66 | isCalled = true; 67 | } 68 | 69 | if (job == null) { 70 | return false; 71 | } 72 | 73 | boolean success = false; 74 | // Schedule the job on the JobTracker and wait for it to complete. 75 | try { 76 | success = job.waitForCompletion(true); 77 | } catch (InterruptedException interuptus) { 78 | // We're told to stop, so honor that. 79 | // And restore interupt status. 80 | Thread.currentThread().interrupt(); 81 | // Indicate that we should NOT run the postProcessor. 82 | success = false; 83 | } 84 | 85 | if (success && (postProcessor != null)) { 86 | success = postProcessor.call(); 87 | } 88 | 89 | return success; 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/etl/ProcessRecordKey.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | /** 19 | */ 20 | public class ProcessRecordKey { 21 | private final String cluster; 22 | private final long timestamp; 23 | 24 | public ProcessRecordKey(String cluster, long timestamp) { 25 | this.cluster = cluster; 26 | this.timestamp = timestamp; 27 | } 28 | 29 | public String getCluster() { 30 | return cluster; 31 | } 32 | 33 | public long getTimestamp() { 34 | return timestamp; 35 | } 36 | 37 | @Override 38 | public boolean equals(Object other) { 39 | if (other != null && other instanceof ProcessRecordKey) { 40 | return cluster.equals(((ProcessRecordKey) other).getCluster()) && 41 | timestamp == ((ProcessRecordKey) other).getTimestamp(); 42 | } 43 | return false; 44 | } 45 | 46 | public String toString() { 47 | return new StringBuilder("ProcessRecordKey[cluster=") 48 | .append(cluster) 49 | .append(", timestamp=") 50 | .append(timestamp) 51 | .append("]") 52 | .toString(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/etl/ProcessRecordKeyConverter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | import org.apache.hadoop.hbase.util.Bytes; 19 | import com.twitter.hraven.Constants; 20 | import com.twitter.hraven.datasource.ByteConverter; 21 | import com.twitter.hraven.etl.ProcessRecordKey; 22 | import com.twitter.hraven.util.ByteUtil; 23 | 24 | /** 25 | */ 26 | public class ProcessRecordKeyConverter implements ByteConverter { 27 | @Override 28 | public byte[] toBytes(ProcessRecordKey key) { 29 | long invertedTimestamp = Long.MAX_VALUE - key.getTimestamp(); 30 | return ByteUtil.join(Constants.SEP_BYTES, 31 | Bytes.toBytes(key.getCluster()), 32 | Bytes.toBytes(invertedTimestamp)); 33 | } 34 | 35 | @Override 36 | public ProcessRecordKey fromBytes(byte[] bytes) { 37 | byte[][] parts = ByteUtil.split(bytes, Constants.SEP_BYTES, 2); 38 | long invertedTimestamp = Bytes.toLong(parts[1]); 39 | return new ProcessRecordKey(Bytes.toString(parts[0]), 40 | Long.MAX_VALUE - invertedTimestamp); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/etl/ProcessRecordUpdater.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2016 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | import java.io.IOException; 19 | import java.util.concurrent.Callable; 20 | 21 | import org.apache.hadoop.conf.Configuration; 22 | import org.apache.hadoop.hbase.client.Connection; 23 | import org.apache.hadoop.hbase.client.ConnectionFactory; 24 | 25 | /** 26 | * Updates a processRecord to the given status when called. 27 | */ 28 | public class ProcessRecordUpdater implements Callable { 29 | 30 | /** 31 | * Which is to be updated. 32 | */ 33 | private final ProcessRecord processRecord; 34 | 35 | /** 36 | * The new state to set the record to using the service. 37 | */ 38 | private final ProcessState newState; 39 | 40 | /** 41 | * Used to connect to HBase. 42 | */ 43 | private final Configuration hbaseConf; 44 | 45 | /** 46 | * @param hBaseconf used to connect to HBase 47 | * @throws IOException 48 | */ 49 | public ProcessRecordUpdater(Configuration hBaseconf, 50 | ProcessRecord processRecord, ProcessState newState) throws IOException { 51 | this.hbaseConf = hBaseconf; 52 | this.processRecord = processRecord; 53 | this.newState = newState; 54 | } 55 | 56 | /* 57 | * (non-Javadoc) 58 | * 59 | * @see java.util.concurrent.Callable#call() 60 | */ 61 | @Override 62 | public Boolean call() throws Exception { 63 | 64 | ProcessRecord updatedRecord = null; 65 | Connection hbaseConnection = null; 66 | try { 67 | hbaseConnection = ConnectionFactory.createConnection(hbaseConf); 68 | // Connect only when needed. 69 | ProcessRecordService processRecordService = 70 | new ProcessRecordService(hbaseConf, hbaseConnection); 71 | 72 | updatedRecord = 73 | processRecordService.setProcessState(processRecord, newState); 74 | } finally { 75 | if (hbaseConnection != null) { 76 | hbaseConnection.close(); 77 | } 78 | } 79 | 80 | if ((updatedRecord != null) 81 | && (updatedRecord.getProcessState() == newState)) { 82 | return true; 83 | } 84 | return false; 85 | } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/etl/ProcessState.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | /** 19 | * Keeps track of the state of the processing of a bunch of job conf and job 20 | * history files. 21 | * 22 | */ 23 | public enum ProcessState { 24 | 25 | /** 26 | * When processing has just started, but no complete set of job files has been 27 | * moved to the processing directory yet. 28 | */ 29 | CREATED(0), 30 | 31 | /** 32 | * Pre-processing step is complete. The number of processed files will 33 | * indicate how many files have been processed. 34 | */ 35 | PREPROCESSED(1), 36 | 37 | /** 38 | * The loading step is complete. The number of processed files will indicate 39 | * how many files have been processed. The record will now also have a min and 40 | * a max job ID processed. 41 | */ 42 | LOADED(2), 43 | 44 | /** 45 | * All job files between the min and the max job ID for a given cluster are 46 | * processed. 47 | */ 48 | PROCESSED(3); 49 | 50 | /** 51 | * Representing this state. 52 | */ 53 | private final int code; 54 | 55 | private ProcessState(int code) { 56 | this.code = code; 57 | } 58 | 59 | /** 60 | * @return the code for this state 61 | */ 62 | public int getCode() { 63 | return code; 64 | } 65 | 66 | /** 67 | * @param code 68 | * representing the state 69 | * @return the ProcessState for this code, or if not recognized, then return 70 | * {@link ProcessState#CREATED} 71 | */ 72 | public static ProcessState getProcessState(int code) { 73 | for (ProcessState state : ProcessState.values()) { 74 | if (state.getCode() == code) { 75 | return state; 76 | } 77 | } 78 | return CREATED; 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/mapreduce/ProcessingCounter.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.mapreduce; 17 | 18 | public enum ProcessingCounter { 19 | 20 | /** 21 | * Indicating how many raw records (jobs) could not be processed successfully. 22 | */ 23 | RAW_ROW_ERROR_COUNT, 24 | 25 | /** 26 | * Indicating how many raw records (jobs) could not be processed successfully. 27 | */ 28 | RAW_ROW_SUCCESS_COUNT; 29 | 30 | } 31 | -------------------------------------------------------------------------------- /hraven-etl/src/main/java/com/twitter/hraven/mapreduce/RecordTypes.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.twitter.hraven.mapreduce; 20 | 21 | /** 22 | * Record types are identifiers for each line of log in history files. 23 | * A record type appears as the first token in a single line of log. 24 | */ 25 | public enum RecordTypes { 26 | Job, Task, MapAttempt, ReduceAttempt, Meta 27 | } 28 | -------------------------------------------------------------------------------- /hraven-etl/src/test/java/com/twitter/hraven/etl/TestFileStatusModificationTimeComparator.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import org.apache.hadoop.fs.FileStatus; 21 | import org.junit.Test; 22 | 23 | import com.twitter.hraven.etl.FileStatusModificationComparator; 24 | 25 | /** 26 | * Test the FileStatusModificationComparator 27 | */ 28 | public class TestFileStatusModificationTimeComparator { 29 | 30 | private static final FileStatus fileStatus1 = new FileStatus(0, false, 0, 0, 31 | 13, null); 32 | private static final FileStatus fileStatus2 = new FileStatus(0, false, 0, 0, 33 | 17, null); 34 | 35 | /** 36 | * Do the needed. 37 | */ 38 | @Test 39 | public void testCompare() { 40 | 41 | FileStatusModificationComparator fsModComp = new FileStatusModificationComparator(); 42 | 43 | assertEquals(0, fsModComp.compare(fileStatus1, fileStatus1)); 44 | assertEquals(0, fsModComp.compare(fileStatus2, fileStatus2)); 45 | assertEquals(0, fsModComp.compare(null, null)); 46 | 47 | // Smaller 48 | assertEquals(-1, fsModComp.compare(null, fileStatus1)); 49 | assertEquals(-1, fsModComp.compare(null, fileStatus2)); 50 | assertEquals(-1, fsModComp.compare(fileStatus1, fileStatus2)); 51 | 52 | // Bigger 53 | assertEquals(1, fsModComp.compare(fileStatus1, null)); 54 | assertEquals(1, fsModComp.compare(fileStatus2, null)); 55 | assertEquals(1, fsModComp.compare(fileStatus2, fileStatus1)); 56 | 57 | int x = 10; 58 | int y = 3; 59 | int q = x / y; 60 | int r = x % y; 61 | int b = (r > 0) ? (x / y) + 1 : (x / y); 62 | System.out.println("x=" + x + " y=" + y + " q=" + q + " r=" + r + " b=" + b); 63 | 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /hraven-etl/src/test/java/com/twitter/hraven/etl/TestJobHistoryFileParserFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertNotNull; 20 | import static org.junit.Assert.assertNull; 21 | import static org.junit.Assert.assertTrue; 22 | import org.junit.Test; 23 | 24 | import com.twitter.hraven.HadoopVersion; 25 | 26 | /** 27 | * Test {@link JobHistoryFileParserFactory} 28 | * 29 | */ 30 | public class TestJobHistoryFileParserFactory { 31 | 32 | @Test 33 | public void testCreateJobHistoryFileParserCorrectCreation() { 34 | 35 | String jHist2 = "Avro-Json\n" + "{\"type\":\"record\",\"name\":\"Event\", " 36 | + "\"namespace\":\"org.apache.hadoop.mapreduce.jobhistory\"," + 37 | "\"fields\":[]\""; 38 | JobHistoryFileParser historyFileParser = JobHistoryFileParserFactory 39 | .createJobHistoryFileParser(jHist2.getBytes(), null); 40 | 41 | assertNotNull(historyFileParser); 42 | 43 | /* 44 | * confirm that we get back an object that can parse hadoop 1.0 files 45 | */ 46 | assertTrue(historyFileParser instanceof JobHistoryFileParserHadoop2); 47 | 48 | } 49 | 50 | /** 51 | * check the version in history files in hadoop 2 52 | */ 53 | @Test 54 | public void testGetVersion() { 55 | String jHist2 = "Avro-Json\n" 56 | + "{\"type\":\"record\",\"name\":\"Event\", " 57 | + "\"namespace\":\"org.apache.hadoop.mapreduce.jobhistory\",\"fields\":[]\""; 58 | HadoopVersion version2 = JobHistoryFileParserFactory.getVersion(jHist2.getBytes()); 59 | // confirm that we get back hadoop 2.0 version 60 | assertEquals(JobHistoryFileParserFactory.getHistoryFileVersion2(), version2); 61 | } 62 | 63 | /** 64 | * confirm that exception is thrown on incorrect input 65 | */ 66 | @Test(expected = IllegalArgumentException.class) 67 | public void testGetVersionIncorrect2() { 68 | String jHist2 = 69 | "Avro-HELLO-Json\n" + "{\"type\":\"record\",\"name\":\"Event\", " 70 | + "\"namespace\":\"org.apache.hadoop.mapreduce.jobhistory\",\"fields\":[]\""; 71 | JobHistoryFileParserFactory.getVersion(jHist2.getBytes()); 72 | } 73 | 74 | /** 75 | * confirm that exception is thrown on incorrect input 76 | */ 77 | @Test(expected = IllegalArgumentException.class) 78 | public void testGetVersionIncorrect1() { 79 | String jHist1 = "Meta HELLO VERSION=\"1\" .\n" + "Job JOBID=\"job_201301010000_12345\""; 80 | JobHistoryFileParserFactory.getVersion(jHist1.getBytes()); 81 | } 82 | 83 | /** 84 | * confirm that exception is thrown on null input 85 | */ 86 | @Test(expected = IllegalArgumentException.class) 87 | public void testCreateJobHistoryFileParserNullCreation() { 88 | JobHistoryFileParser historyFileParser = JobHistoryFileParserFactory 89 | .createJobHistoryFileParser(null, null); 90 | assertNull(historyFileParser); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /hraven-etl/src/test/java/com/twitter/hraven/etl/TestProcessRecord.java: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2012 Twitter, Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package com.twitter.hraven.etl; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertNull; 20 | 21 | import org.junit.Test; 22 | 23 | import com.twitter.hraven.etl.ProcessRecord; 24 | import com.twitter.hraven.etl.ProcessState; 25 | 26 | /** 27 | * Test {@link ProcessRecord}, and specifically the key construction and 28 | * deconstruction.s 29 | * 30 | */ 31 | public class TestProcessRecord { 32 | 33 | private static final String CLUSTER = "cluster@identifier"; 34 | private static final ProcessState PROCESS_STATE = ProcessState.CREATED; 35 | private static final long MIN_MODIFICATION_TIME_MILLIS = 1336115621494L; 36 | private static final long MAX_MODIFICATION_TIME_MILLIS = 1336115732505L; 37 | private static final int PROCESSED_JOB_FILES = 7; 38 | private static final String PROCESSING_DIRECTORY = "/hadoop/mapred/history/processing/20120503061229"; 39 | 40 | @Test 41 | public void testConstructors() { 42 | ProcessRecord processRecord = new ProcessRecord(CLUSTER, 43 | MIN_MODIFICATION_TIME_MILLIS, MAX_MODIFICATION_TIME_MILLIS, 44 | PROCESSED_JOB_FILES, PROCESSING_DIRECTORY); 45 | ProcessRecord processRecord2 = new ProcessRecord( 46 | processRecord.getCluster(), PROCESS_STATE, 47 | processRecord.getMinModificationTimeMillis(), 48 | processRecord.getMaxModificationTimeMillis(), 49 | processRecord.getProcessedJobFiles(), 50 | processRecord.getProcessFile(), null, null); 51 | 52 | assertEquals(processRecord.getKey(), processRecord2.getKey()); 53 | assertEquals(processRecord.getCluster(), processRecord2.getCluster()); 54 | assertEquals(processRecord.getMaxModificationTimeMillis(), 55 | processRecord2.getMaxModificationTimeMillis()); 56 | assertEquals(processRecord.getMinModificationTimeMillis(), 57 | processRecord2.getMinModificationTimeMillis()); 58 | assertEquals(processRecord.getProcessedJobFiles(), 59 | processRecord2.getProcessedJobFiles()); 60 | assertEquals(processRecord.getProcessFile(), 61 | processRecord2.getProcessFile()); 62 | assertEquals(processRecord.getMinJobId(), 63 | processRecord2.getMinJobId()); 64 | assertEquals(processRecord.getMaxJobId(), 65 | processRecord2.getMaxJobId()); 66 | 67 | 68 | assertEquals(CLUSTER, processRecord2.getCluster()); 69 | assertEquals(MAX_MODIFICATION_TIME_MILLIS, 70 | processRecord2.getMaxModificationTimeMillis()); 71 | assertEquals(MIN_MODIFICATION_TIME_MILLIS, 72 | processRecord2.getMinModificationTimeMillis()); 73 | assertEquals(PROCESSED_JOB_FILES, processRecord2.getProcessedJobFiles()); 74 | assertEquals(PROCESSING_DIRECTORY, processRecord2.getProcessFile()); 75 | assertNull(processRecord2.getMinJobId()); 76 | 77 | // TODO: Add a minJobId and maxJobId value test 78 | 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /hraven-etl/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootCategory=INFO,console 2 | 3 | # 4 | # console 5 | # Add "console" to rootlogger above if you want to use this 6 | # 7 | log4j.appender.console=org.apache.log4j.ConsoleAppender 8 | log4j.appender.console.target=System.err 9 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 10 | log4j.appender.console.layout.ConversionPattern=%d %-5p [%t] %C{2}(%L): %m%n 11 | 12 | # Custom Logging levels 13 | 14 | #log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG 15 | 16 | log4j.logger.org.apache.hadoop=WARN 17 | log4j.logger.org.apache.zookeeper=ERROR 18 | log4j.logger.org.apache.hadoop.hbase=INFO 19 | 20 | log4j.logger.com.twitter.hraven=DEBUG 21 | --------------------------------------------------------------------------------