├── LICENSE ├── README.md ├── diagram1.png ├── diagram2.png ├── pom.xml └── src └── main ├── java └── org │ └── bitsofinfo │ ├── ec2 │ └── Ec2Util.java │ └── s3 │ ├── S3BucketLoader.java │ ├── S3Util.java │ ├── cmd │ ├── CmdResult.java │ ├── CommandExecutor.java │ └── TocPathOpResult.java │ ├── control │ ├── CCMode.java │ ├── CCPayload.java │ ├── CCPayloadHandler.java │ ├── CCPayloadType.java │ └── ControlChannel.java │ ├── master │ ├── Master.java │ ├── ShutdownInfo.java │ ├── TOCGenerationEventHandler.java │ ├── TOCGeneratorAndSender.java │ ├── TOCQueueEmptier.java │ ├── TocInfoQueueSender.java │ ├── TocInfoSizeAwareQueue.java │ ├── WorkerInfo.java │ └── WorkerRegistry.java │ ├── toc │ ├── DirectoryCrawler.java │ ├── FileCopyTOCPayloadHandler.java │ ├── S3BucketObjectLister.java │ ├── S3KeyCopyingTOCPayloadHandler.java │ ├── SourceTOCGenerator.java │ ├── TOCManifestBasedGenerator.java │ ├── TOCPayload.java │ ├── TOCPayloadHandler.java │ ├── TOCPayloadValidator.java │ ├── TOCQueue.java │ ├── TocInfo.java │ └── ValidatingTOCPayloadHandler.java │ ├── util │ └── CompressUtil.java │ ├── worker │ ├── ErrorReport.java │ ├── ResultSummary.java │ ├── Worker.java │ ├── WorkerState.java │ ├── WriteBackoffMonitor.java │ ├── WriteErrorMonitor.java │ ├── WriteMonitor.java │ └── WriteMonitorError.java │ └── yas3fs │ └── Yas3fsS3UploadMonitor.java └── resources ├── ec2-init-s3BucketLoader.sample.py ├── log4j.properties └── s3BucketLoader.sample.properties /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | s3-bucket-loader 2 | ================ 3 | 4 | This project originated out of a need to quickly import (and backup) a massive amount of files (hundreds of gigabytes) into an AWS S3 bucket, 5 | with the ultimate intent that this bucket be managed going forward via the S3 distributed file-system; 6 | [yas3fs](https://github.com/danilop/yas3fs). Initial attempts at doing this a traditional way, 7 | (i.e. rsyncing or copying from source to destination) quickly became impractical due to the sheer 8 | amount of time that single-threaded, and even limited multi-threaded copiers would take. 9 | 10 | s3-bucket-loader leverages a simple master/worker paradigm to get economies of scale for copying many files from sourceA to targetB. 11 | "sourceA" and "targetB" could be two S3 buckets, or a file-system to S3 bucket (via an S3 file-system abstraction like yas3fs or s3fs etc). 12 | Even though this is coded with S3 being the ultimate destination it could be used for other targets as well including other shared file-systems. 13 | The speed at which you can import a given file-set into S3 (through yas3fs in this case) is only limited on how much money you 14 | want to spend in worker hardware. For example this has been used to import and validate in S3 over 35k files (11gb total) 15 | in roughly 16 minutes; using 40 ec2 t2.medium instances as workers. In another scenario it was used to import and validate 16 | over 800k files totaling roughly 600gb in under 8 hours. This program has also been used to copy the previously imported 17 | buckets to secondary 'backup' buckets in under an hour. 18 | 19 | 20 | ![Alt text](/diagram1.png "Diagram1") 21 | 22 | ![Alt text](/diagram2.png "Diagram2") 23 | 24 | ## How it works 25 | 26 | This is a multi-threaded Java program that can be launched in two modes `master` or `worker`. The `master` is 27 | responsible for determining a table of contents (TOC) (i.e. file paths) which are candidates for WRITE to the 28 | destination and subsequently VALIDATED. The `master` node streams these TOC events over an SQS queue which is 29 | consumed to by one or more `workers`. Each `worker` must also have access to the `source` from which the TOC 30 | was generated from. The `source` data could be the same physical set of files, an S3 bucket, a copy of them or whatever... it really 31 | does not matter, but they just need to be accessible from each `worker` (i.e. via a SAN/NAS/NFS share, source S3 bucket etc). 32 | The `worker` then copies each item (in the case of files via rsync (or cp) to S3 via an S3 FS abstraction) or via an S3 key-copy. 33 | It uses rsync to preserve uid/gid information which is important for the ultimate consumer; and ensured preservation 34 | if written to S3 via S3 file-system abstractions like [yas3fs](https://github.com/danilop/yas3fs). 35 | It is also important to note that each `worker` leverages N threads to increase parallelism and maximize the 36 | throughput to S3. The more `workers` you have the faster it goes. 37 | 38 | Please see [s3BucketLoader.sample.properties](https://github.com/bitsofinfo/s3-bucket-loader/blob/master/src/main/resources/s3BucketLoader.sample.properties) for 39 | more details on configuration options and how-to-use etc 40 | 41 | ## Flow overview 42 | 43 | 1. End user starts the Master which creates the SNS control-channel and SQS TOC queue 44 | 45 | 2. The Master (optionally) launches N worker nodes on EC2 46 | 47 | 3. As each worker node initializes its subscribes to the control-channel and publishes that it is INITIALIZED 48 | 49 | 4. Once the master sees all of its workers in INITIALIZED state, the master changes the state to WRITE 50 | 51 | 5. The master begins creating the TOC (consisting of path, isDirectory and size), and sends a SQS message for each file to the TOC queue. Again the 'source' for these 52 | TOC entries could be a path realized via the file-system, or a file-like key name in a source S3 bucket. 53 | 54 | 6. Workers begin consuming TOC messages off the queue and execute their TOCPayloadHandler, which might do a S3 key-copy or 55 | rsyncs (or cp) from the source -> destination through an S3 file-system abstraction. As workers are consuming they periodically 56 | send CURRENT SUMMARY updates to the master. If `failfast` is configured and any failures are detected the master can 57 | switch the cluster to ERROR_REPORT mode immediately (see below). Depending on the handler, they can also do chowns, chmods etc. 58 | 59 | 7. When workers are complete, they publish their WRITE SUMMARY and go into an IDLE state 60 | 61 | 8. Master receives all WRITE SUMMARYs from the workers 62 | * If no errors, the master transitions to the VALIDATE state, and sends the TOC to the queue again 63 | * If errors the master transitions to the ERROR_REPORT state, and requests error details from the workers 64 | 65 | 9. In VALIDATE state, all workers consume TOC file paths from the SQS queue and attempt to verify the file exists 66 | and its sizes matches the expected TOC size (locally and/or s3 object metat-data calls). When complete they go into IDLE state and publish their VALIDATE SUMMARY 67 | 68 | 10. After receiving all VALIDATE SUMMARYs from the workers 69 | * If no errors, the master issues a shutdown command to all workers, then optionally terminates all instances 70 | * If errors the master transitions to the ERROR_REPORT state, and requests error details from the workers 71 | 72 | 11. In ERROR REPORT state, workers summarize and publish their errors from either state WRITE/VALIDATE, 73 | the master aggregates them and reports them to the master log file for analysis. All workers are then shutdown. 74 | 75 | 12. At any stage, issuing a control-C on the master triggers a shutdown of the entire cluster, 76 | including ec2 worker termination if configured in the properties file 77 | 78 | 79 | ## How to run 80 | 81 | * Clone this repository 82 | 83 | * You need a Java JDK installed preferable 1.6+ 84 | 85 | * You need [Maven](http://maven.apache.org/) installed 86 | 87 | * Change dir to the root of the project and run 'mvn package' (this will build a runnable Jar under target/) 88 | 89 | * Copy the [s3BucketLoader.sample.properties](https://github.com/bitsofinfo/s3-bucket-loader/blob/master/src/main/resources/s3BucketLoader.sample.properties) 90 | file under src/main/resources, make your own and customize it. 91 | 92 | * run the below to launch, 1st on the MASTER, and then on the WORKERS (which the Master can do itself...) 93 | ``` 94 | java -jar -DisMaster=true|false -Ds3BucketLoaderHome=/some/dir -DconfigFilePath=s3BucketLoader.properties s3-bucket-loader-0.0.1-SNAPSHOT.jar 95 | ``` 96 | 97 | * The sample properties should be fairly self explanatory. Its important to understand that it is up 98 | to YOU to properly configure your environment for both the master and worker(s). The `master` needs access to the 99 | gold-copy "source" files that you want to get into S3. The `workers` need access to both the "source" files and 100 | some sort of S3 target (via an S3 file-system abstraction like yas3fs). Note that s3-bucket-loader can automatically 101 | configure your workers for you... you just need to configure a 'user-data' startup script for the EC2 instances 102 | that your `master` will launch. A example/sample one that I have used previously is provided under 103 | [ec2-init-s3BucketLoader.sample.py](src/main/resources/ec2-init-s3BucketLoader.sample.py). For example, when ec2 launches your 104 | workers, a startup script can pull all packages needed to prepare the environment from another S3 bucket, install things, 105 | configure and even pull down the latest s3-bucket-loader jar file, the worker properties file and finally launch the worker. 106 | 107 | Enjoy. 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /diagram1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bitsofinfo/s3-bucket-loader/68f876198084665b74a744a37875a4be00c397e1/diagram1.png -------------------------------------------------------------------------------- /diagram2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bitsofinfo/s3-bucket-loader/68f876198084665b74a744a37875a4be00c397e1/diagram2.png -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | org.bitsofinfo 4 | s3-bucket-loader 5 | 0.0.1-SNAPSHOT 6 | 7 | 8 | 9 | 10 | local_repo 11 | 12 | true 13 | ignore 14 | 15 | 16 | false 17 | 18 | file://${project.basedir}/local_repo 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | org.apache.maven.plugins 28 | maven-compiler-plugin 29 | 30 | UTF-8 31 | 1.6 32 | 1.6 33 | 34 | **/*.java 35 | **/*.xml 36 | **/*.txt 37 | **/*.yml 38 | 39 | 40 | 41 | 42 | 43 | org.apache.maven.plugins 44 | maven-jar-plugin 45 | 2.3.2 46 | 47 | 48 | 49 | true 50 | 51 | 52 | 53 | 54 | 55 | 56 | org.apache.maven.plugins 57 | maven-shade-plugin 58 | 1.6 59 | 60 | true 61 | 62 | 63 | *:* 64 | 65 | META-INF/*.SF 66 | META-INF/*.DSA 67 | META-INF/*.RSA 68 | 69 | 70 | 71 | 72 | 73 | 74 | package 75 | 76 | shade 77 | 78 | 79 | 80 | 82 | META-INF/spring.handlers 83 | 84 | 86 | META-INF/spring.schemas 87 | 88 | 90 | 92 | org.bitsofinfo.s3.S3BucketLoader 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | com.google.guava 106 | guava 107 | 18.0 108 | 109 | 110 | 111 | commons-dbcp 112 | commons-dbcp 113 | 1.4 114 | 115 | 116 | 117 | 118 | com.amazonaws 119 | aws-java-sdk 120 | 1.8.10.2 121 | 122 | 123 | 124 | log4j 125 | log4j 126 | 1.2.16 127 | 128 | 129 | 130 | com.fasterxml.jackson.core 131 | jackson-databind 132 | 2.2.0 133 | 134 | 135 | 136 | 137 | stax 138 | stax-api 139 | 1.0.1 140 | 141 | 142 | 143 | stax 144 | stax 145 | 1.2.0 146 | 147 | 148 | 149 | org.springframework 150 | spring-context 151 | 3.1.4.RELEASE 152 | 153 | 154 | 155 | org.springframework 156 | spring-orm 157 | 3.1.4.RELEASE 158 | 159 | 160 | 161 | joda-time 162 | joda-time 163 | 2.3 164 | 165 | 166 | 167 | javax.mail 168 | mail 169 | 1.4.7 170 | 171 | 172 | 173 | org.freemarker 174 | freemarker 175 | 2.3.20 176 | 177 | 178 | 179 | org.aspectj 180 | aspectjrt 181 | 1.6.12 182 | 183 | 184 | 185 | commons-io 186 | commons-io 187 | 2.4 188 | 189 | 190 | 191 | org.springframework 192 | spring-core 193 | 3.1.4.RELEASE 194 | 195 | 196 | 197 | org.apache.httpcomponents 198 | httpclient 199 | 4.3.3 200 | 201 | 202 | 203 | 204 | commons-codec 205 | commons-codec 206 | 1.9 207 | 208 | 209 | 210 | org.apache.commons 211 | commons-exec 212 | 1.2 213 | 214 | 215 | 216 | com.google.code.gson 217 | gson 218 | 2.2.4 219 | 220 | 221 | 222 | 223 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/ec2/Ec2Util.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.ec2; 2 | 3 | import java.io.File; 4 | import java.io.FileInputStream; 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | import java.util.Arrays; 8 | import java.util.Collection; 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.Properties; 12 | import java.util.TreeMap; 13 | 14 | import org.apache.log4j.Logger; 15 | 16 | import com.amazonaws.services.ec2.AmazonEC2Client; 17 | import com.amazonaws.services.ec2.model.BlockDeviceMapping; 18 | import com.amazonaws.services.ec2.model.DescribeInstanceStatusRequest; 19 | import com.amazonaws.services.ec2.model.DescribeInstanceStatusResult; 20 | import com.amazonaws.services.ec2.model.EbsBlockDevice; 21 | import com.amazonaws.services.ec2.model.Instance; 22 | import com.amazonaws.services.ec2.model.InstanceStatus; 23 | import com.amazonaws.services.ec2.model.Reservation; 24 | import com.amazonaws.services.ec2.model.RunInstancesRequest; 25 | import com.amazonaws.services.ec2.model.RunInstancesResult; 26 | import com.amazonaws.services.ec2.model.ShutdownBehavior; 27 | import com.amazonaws.services.ec2.model.StartInstancesRequest; 28 | import com.amazonaws.services.ec2.model.StopInstancesRequest; 29 | import com.amazonaws.services.ec2.model.TerminateInstancesRequest; 30 | import com.amazonaws.services.ec2.model.VolumeType; 31 | import com.amazonaws.util.Base64; 32 | 33 | public class Ec2Util { 34 | 35 | private static final Logger logger = Logger.getLogger(Ec2Util.class); 36 | 37 | /** 38 | * Returns map of instanceId:privateDnsName 39 | * 40 | * @param ec2Instances 41 | * @return 42 | */ 43 | public Map getPrivateDNSNames(List ec2Instances) { 44 | TreeMap names = new TreeMap(String.CASE_INSENSITIVE_ORDER); 45 | for (Instance i : ec2Instances) { 46 | names.put(i.getInstanceId(),i.getPrivateDnsName().toLowerCase()); 47 | } 48 | return names; 49 | } 50 | 51 | /** 52 | * Returns map of instanceId:privateIp 53 | * 54 | * @param ec2Instances 55 | * @return 56 | */ 57 | public Map getPrivateIPs(List ec2Instances) { 58 | TreeMap names = new TreeMap(String.CASE_INSENSITIVE_ORDER); 59 | for (Instance i : ec2Instances) { 60 | names.put(i.getInstanceId(),i.getPrivateIpAddress()); 61 | } 62 | return names; 63 | } 64 | 65 | public void startInstance(AmazonEC2Client ec2Client, String instanceId) throws Exception { 66 | StartInstancesRequest startReq = new StartInstancesRequest(); 67 | List instanceIds = new ArrayList(); 68 | instanceIds.add(instanceId); 69 | startReq.setInstanceIds(instanceIds); 70 | logger.debug("Starting EC2 instance...." + Arrays.toString(instanceIds.toArray(new String[]{}))); 71 | ec2Client.startInstances(startReq); 72 | } 73 | 74 | public void stopInstance(AmazonEC2Client ec2Client, String instanceId) throws Exception { 75 | StopInstancesRequest stopReq = new StopInstancesRequest(); 76 | List instanceIds = new ArrayList(); 77 | instanceIds.add(instanceId); 78 | stopReq.setInstanceIds(instanceIds); 79 | logger.debug("Stopping EC2 instance...." + Arrays.toString(instanceIds.toArray(new String[]{}))); 80 | ec2Client.stopInstances(stopReq); 81 | } 82 | 83 | public void terminateEc2Instance(AmazonEC2Client ec2Client, String instanceId) throws Exception { 84 | try { 85 | TerminateInstancesRequest termReq = new TerminateInstancesRequest(); 86 | List instanceIds = new ArrayList(); 87 | instanceIds.add(instanceId); 88 | termReq.setInstanceIds(instanceIds); 89 | logger.debug("Terminating EC2 instances...." + Arrays.toString(instanceIds.toArray(new String[]{}))); 90 | ec2Client.terminateInstances(termReq); 91 | 92 | } catch(Exception e) { 93 | logger.error("Unexpected error terminating: " + instanceId + " "+ e.getMessage(),e); 94 | } 95 | } 96 | 97 | 98 | public List launchEc2Instances(AmazonEC2Client ec2Client, Properties props) throws Exception { 99 | 100 | Integer totalExpectedWorkers = Integer.valueOf(props.getProperty("master.workers.total")); 101 | 102 | // disk size 103 | Collection blockDevices = new ArrayList(); 104 | blockDevices.add( 105 | new BlockDeviceMapping() 106 | .withDeviceName(props.getProperty("master.workers.ec2.disk.deviceName")) 107 | .withEbs(new EbsBlockDevice() 108 | .withVolumeType(VolumeType.valueOf(props.getProperty("master.workers.ec2.disk.volumeType"))) 109 | .withDeleteOnTermination(true) 110 | .withVolumeSize(Integer.valueOf(props.getProperty("master.workers.ec2.disk.size.gigabytes"))))); 111 | 112 | // create our run request for the total workers we expect 113 | RunInstancesRequest runInstancesRequest = new RunInstancesRequest(); 114 | runInstancesRequest.withImageId(props.getProperty("master.workers.ec2.ami.id")) 115 | .withInstanceType(props.getProperty("master.workers.ec2.instanceType")) 116 | .withMinCount(totalExpectedWorkers) 117 | .withMaxCount(totalExpectedWorkers) 118 | .withBlockDeviceMappings(blockDevices) 119 | .withKeyName(props.getProperty("master.workers.ec2.keyName")) 120 | .withSecurityGroupIds(props.getProperty("master.workers.ec2.securityGroupId")) 121 | .withInstanceInitiatedShutdownBehavior(ShutdownBehavior.valueOf(props.getProperty("master.workers.ec2.shutdownBehavior"))) 122 | .withSubnetId(props.getProperty("master.workers.ec2.subnetId")) 123 | .withUserData(Base64.encodeAsString(readFile(props.getProperty("master.workers.ec2.userDataFile")).getBytes())); 124 | 125 | // launch 126 | logger.debug("Launching " + totalExpectedWorkers + " EC2 instances, " + 127 | "it may take few minutes for workers to come up...: \n" + 128 | "\tamiId:" + runInstancesRequest.getImageId() +"\n"+ 129 | "\tsecGrpId:" + runInstancesRequest.getSecurityGroupIds().get(0) +"\n"+ 130 | "\tsubnetId:" + runInstancesRequest.getSubnetId() +"\n"+ 131 | "\tinstanceType:" + runInstancesRequest.getInstanceType() +"\n"+ 132 | "\tshutdownBehavior:" + runInstancesRequest.getInstanceInitiatedShutdownBehavior() +"\n"+ 133 | "\tkeyName:" + runInstancesRequest.getKeyName() 134 | ); 135 | 136 | 137 | // as the instances come up, assuming the "userData" above launches the worker we will be good 138 | // they will auto register w/ us the master 139 | RunInstancesResult result = ec2Client.runInstances(runInstancesRequest); 140 | Reservation reservation = result.getReservation(); 141 | return reservation.getInstances(); 142 | } 143 | 144 | public InstanceStatus getInstanceStatus(AmazonEC2Client ec2Client, String instanceId) { 145 | List instanceIds = new ArrayList(); 146 | instanceIds.add(instanceId); 147 | DescribeInstanceStatusRequest statusReq = new DescribeInstanceStatusRequest(); 148 | statusReq.setInstanceIds(instanceIds); 149 | DescribeInstanceStatusResult result = ec2Client.describeInstanceStatus(statusReq); 150 | List statuses = result.getInstanceStatuses(); 151 | if (statuses == null || statuses.size() == 0) { 152 | return null; 153 | } 154 | return statuses.iterator().next(); 155 | } 156 | 157 | public List dumpEc2InstanceStatus(AmazonEC2Client ec2Client, List ec2Instances) { 158 | try { 159 | List instanceIds = new ArrayList(); 160 | 161 | for (Instance ec2node : ec2Instances) { 162 | instanceIds.add(ec2node.getInstanceId()); 163 | } 164 | 165 | DescribeInstanceStatusRequest statusReq = new DescribeInstanceStatusRequest(); 166 | statusReq.setInstanceIds(instanceIds); 167 | DescribeInstanceStatusResult result = ec2Client.describeInstanceStatus(statusReq); 168 | 169 | List statuses = result.getInstanceStatuses(); 170 | 171 | List impairedInstances = new ArrayList(); 172 | 173 | StringBuffer sb = new StringBuffer("EC2 worker instance STATUS:\n"); 174 | for (InstanceStatus status : statuses) { 175 | sb.append("\tid:"+status.getInstanceId() + 176 | "\taz:" + status.getAvailabilityZone() + 177 | "\tstate:" + status.getInstanceState().getName() + 178 | "\tstatus:" + status.getInstanceStatus().getStatus() + 179 | "\tsystem_status: " + status.getSystemStatus().getStatus() + "\n"); 180 | 181 | if (status.getInstanceStatus().getStatus().equalsIgnoreCase("impaired")) { 182 | impairedInstances.add(status.getInstanceId()); 183 | } 184 | } 185 | 186 | logger.info(sb.toString()+"\n"); 187 | 188 | return impairedInstances; 189 | 190 | } catch(Exception e) { 191 | logger.error("Error getting instance state: " + e.getMessage(),e); 192 | return null; 193 | } 194 | 195 | } 196 | 197 | public static String readFile(String path) throws IOException { 198 | File file = new File(path); 199 | FileInputStream fis = new FileInputStream(file); 200 | byte[] data = new byte[(int)file.length()]; 201 | fis.read(data); 202 | fis.close(); 203 | return new String(data, "UTF-8"); 204 | } 205 | 206 | } 207 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/S3BucketLoader.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3; 2 | 3 | import java.io.FileInputStream; 4 | import java.io.InputStream; 5 | import java.nio.charset.Charset; 6 | import java.util.Properties; 7 | 8 | import org.apache.log4j.Logger; 9 | import org.bitsofinfo.s3.master.Master; 10 | import org.bitsofinfo.s3.worker.Worker; 11 | 12 | public class S3BucketLoader { 13 | 14 | private static final Logger logger = Logger.getLogger(S3BucketLoader.class); 15 | 16 | private static Master master = null; 17 | private static Worker worker = null; 18 | 19 | public static void main(String[] args) throws Exception { 20 | 21 | try { 22 | 23 | Properties props = new Properties(); 24 | String confPath = System.getProperty("configFilePath"); 25 | 26 | logger.info("System file.encoding: " + System.getProperty("file.encoding")); 27 | logger.info("System charset: " + Charset.defaultCharset().name()); 28 | 29 | InputStream input = null; 30 | try { 31 | logger.info("Attempting to load props from: " + confPath); 32 | input = new FileInputStream(confPath); 33 | props.load(input); 34 | } catch(Exception e) { 35 | e.printStackTrace(); 36 | throw e; 37 | } 38 | 39 | boolean isMaster = Boolean.valueOf(System.getProperty("isMaster")); 40 | 41 | Runtime.getRuntime().addShutdownHook(new Thread(new S3BucketLoader().new ShutdownHook())); 42 | 43 | if (isMaster) { 44 | execAsMaster(props); 45 | } else { 46 | execAsWorker(props); 47 | } 48 | 49 | // run until we are shutdown.... 50 | while(true) { 51 | Thread.currentThread().sleep(60000); 52 | } 53 | 54 | } catch(Exception e) { 55 | logger.error("main() unexpected error: " + e.getMessage(),e); 56 | } 57 | 58 | 59 | } 60 | 61 | public class ShutdownHook implements Runnable { 62 | public void run() { 63 | try { 64 | logger.debug("ShutdownHook invoked..."); 65 | if (worker != null) {worker.destroy();} 66 | if (master != null) {master.destroy();} 67 | } catch(Exception ignore){} 68 | } 69 | } 70 | 71 | 72 | private static void execAsMaster(Properties props) throws Exception { 73 | master = new Master(props); 74 | master.start(); 75 | } 76 | 77 | public static void execAsWorker(Properties props) { 78 | worker = new Worker(props); 79 | worker.startConsuming(); 80 | } 81 | 82 | 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/S3Util.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3; 2 | 3 | import java.io.File; 4 | import java.util.List; 5 | 6 | import org.apache.log4j.Logger; 7 | 8 | import com.amazonaws.services.s3.AmazonS3Client; 9 | import com.amazonaws.services.s3.model.ObjectMetadata; 10 | import com.amazonaws.services.s3.model.PutObjectRequest; 11 | import com.amazonaws.services.s3.model.StorageClass; 12 | 13 | public class S3Util { 14 | 15 | private static final Logger logger = Logger.getLogger(S3Util.class); 16 | 17 | public void uploadToS3(AmazonS3Client s3Client, 18 | String bucketName, 19 | String s3LogBucketFolderRoot, 20 | String host, 21 | List filePathsToUpload) { 22 | 23 | try { 24 | 25 | for (String file : filePathsToUpload) { 26 | 27 | String key = null; 28 | try { 29 | File item = new File(file.trim()); 30 | 31 | if (!item.exists()) { 32 | logger.error("uploadToS3() cannot upload item, does not exist! " + item.getAbsolutePath()); 33 | continue; 34 | } 35 | 36 | // default to the one file 37 | File[] allFiles = new File[]{item}; 38 | 39 | if (item.isDirectory()) { 40 | allFiles = item.listFiles(); 41 | } 42 | 43 | for (File toUpload : allFiles) { 44 | 45 | if (!toUpload.exists() || toUpload.getName().startsWith(".") || toUpload.isDirectory()) { 46 | logger.error("uploadToS3() cannot upload, does not exist, starts w/ . or is a directory: " + toUpload.getAbsolutePath()); 47 | continue; 48 | } 49 | 50 | key = s3LogBucketFolderRoot + "/" + host + "/" + toUpload.getName(); 51 | 52 | PutObjectRequest req = new PutObjectRequest(bucketName, key, toUpload); 53 | req.setStorageClass(StorageClass.ReducedRedundancy); 54 | ObjectMetadata objectMetadata = new ObjectMetadata(); 55 | objectMetadata.setContentType("text/plain"); 56 | objectMetadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); 57 | req.setMetadata(objectMetadata); 58 | 59 | s3Client.putObject(req); 60 | } 61 | 62 | } catch(Exception e) { 63 | logger.error("uploadToS3() unexpected error uploading logs to: " +bucketName + " key:"+ key + " for " +file); 64 | } 65 | 66 | } 67 | 68 | 69 | } catch(Exception e) { 70 | logger.error("uploadToS3() error uploading logs to S3: " + e.getMessage(),e); 71 | } 72 | 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/cmd/CmdResult.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.cmd; 2 | 3 | public class CmdResult { 4 | 5 | private int exitCode; 6 | private String stdOut; 7 | private String stdErr; 8 | 9 | public CmdResult(int exitCode, String stdOut, String stdErr) { 10 | super(); 11 | this.exitCode = exitCode; 12 | this.stdOut = stdOut; 13 | this.stdErr = stdErr; 14 | } 15 | 16 | public int getExitCode() { 17 | return exitCode; 18 | } 19 | public void setExitCode(int exitCode) { 20 | this.exitCode = exitCode; 21 | } 22 | public String getStdOut() { 23 | return stdOut; 24 | } 25 | public void setStdOut(String stdOut) { 26 | this.stdOut = stdOut; 27 | } 28 | public String getStdErr() { 29 | return stdErr; 30 | } 31 | public void setStdErr(String stdErr) { 32 | this.stdErr = stdErr; 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/cmd/CommandExecutor.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.cmd; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.io.OutputStream; 6 | import java.io.StringWriter; 7 | 8 | import org.apache.commons.exec.CommandLine; 9 | import org.apache.commons.exec.DefaultExecutor; 10 | import org.apache.commons.exec.ExecuteStreamHandler; 11 | import org.apache.commons.io.IOUtils; 12 | import org.apache.log4j.Logger; 13 | 14 | 15 | public class CommandExecutor { 16 | 17 | private static final Logger logger = Logger.getLogger(CommandExecutor.class); 18 | 19 | public CmdResult execute(CommandLine cmdLine, int maxAttempts) { 20 | 21 | CmdResult lastCmdResult = null; 22 | 23 | int attempts = 0; 24 | while(attempts < maxAttempts) { 25 | 26 | attempts++; 27 | 28 | final StringWriter stdOut = new StringWriter(); 29 | final StringWriter stdErr = new StringWriter(); 30 | 31 | try { 32 | 33 | DefaultExecutor executor = new DefaultExecutor(); 34 | executor.setStreamHandler(new ExecuteStreamHandler() { 35 | public void setProcessOutputStream(InputStream is) throws IOException {IOUtils.copy(is, stdOut, "UTF-8");} 36 | public void setProcessErrorStream(InputStream is) throws IOException {IOUtils.copy(is, stdErr, "UTF-8");} 37 | public void stop() throws IOException {} 38 | public void start() throws IOException {} 39 | public void setProcessInputStream(OutputStream os) throws IOException {} 40 | }); 41 | 42 | logger.trace("Executing: attempt:" + attempts + " " + cmdLine.toString()); 43 | 44 | int exitValue = executor.execute(cmdLine); 45 | if (exitValue > 0) { 46 | logger.error("ERROR: attempt #: " + attempts+ " exitCode: "+exitValue+" cmd=" + cmdLine.toString()); 47 | } 48 | 49 | //System.out.println("STDOUT:"+stdOut); 50 | //System.out.println("STDERR:"+stdErr); 51 | 52 | lastCmdResult = new CmdResult(exitValue,stdOut.toString(),stdErr.toString()); 53 | 54 | // if successful exit loop immediately... 55 | if (exitValue == 0) { 56 | logger.trace("SUCCESS! exitCode = 0: " + cmdLine.toString()); 57 | break; 58 | } 59 | 60 | } catch(Exception e) { 61 | logger.error("execute() attempt #: " + attempts+ " cmd:"+cmdLine.toString() + " exception:"+e.getMessage(),e); 62 | lastCmdResult = new CmdResult(9999, stdOut.toString(), "attempt #: " + attempts+ " exception: " + e.getMessage() + " stdErr: " + stdErr.toString()); 63 | } 64 | } 65 | 66 | return lastCmdResult; 67 | } 68 | 69 | 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/cmd/TocPathOpResult.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.cmd; 2 | 3 | import org.bitsofinfo.s3.toc.TOCPayload; 4 | 5 | public class TocPathOpResult { 6 | 7 | public boolean success; 8 | public String filePath; 9 | public String operation; 10 | public String message; 11 | public TOCPayload.MODE mode; 12 | 13 | 14 | public TocPathOpResult(TOCPayload.MODE mode, boolean success, String filePath, String operation, String message) { 15 | super(); 16 | this.success = success; 17 | this.filePath = filePath; 18 | this.operation = operation; 19 | this.message = message; 20 | this.mode = mode; 21 | } 22 | 23 | public boolean isSuccess() { 24 | return success; 25 | } 26 | public void setSuccess(boolean success) { 27 | this.success = success; 28 | } 29 | public String getFilePath() { 30 | return filePath; 31 | } 32 | public void setFilePath(String filePath) { 33 | this.filePath = filePath; 34 | } 35 | public String getOperation() { 36 | return operation; 37 | } 38 | public void setOperation(String operation) { 39 | this.operation = operation; 40 | } 41 | public String getMessage() { 42 | return message; 43 | } 44 | public void setMessage(String message) { 45 | this.message = message; 46 | } 47 | 48 | public TOCPayload.MODE getMode() { 49 | return mode; 50 | } 51 | 52 | public void setMode(TOCPayload.MODE mode) { 53 | this.mode = mode; 54 | } 55 | 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/control/CCMode.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.control; 2 | 3 | public enum CCMode { 4 | 5 | INITIALIZED, 6 | PREPARE, 7 | WRITE, 8 | IDLE, 9 | VALIDATE, 10 | REPORT_ERRORS 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/control/CCPayload.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.control; 2 | 3 | public class CCPayload { 4 | 5 | public boolean fromMaster = false; 6 | public String sourceHostId = null; 7 | public String sourceHostIP = null; 8 | public String onlyForHostIdOrIP = null; 9 | public CCPayloadType type = null; 10 | public Object value = null; 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/control/CCPayloadHandler.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.control; 2 | 3 | public interface CCPayloadHandler { 4 | 5 | public void handlePayload(CCPayload payload) throws Exception; 6 | 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/control/CCPayloadType.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.control; 2 | 3 | public enum CCPayloadType { 4 | 5 | MASTER_CURRENT_MODE, // current mode of the master 6 | WORKER_CURRENT_MODE, // current mode of workers 7 | 8 | WORKER_WRITES_CURRENT_SUMMARY, // sent periodically during write mode stating number of successful/failed so far 9 | WORKER_VALIDATIONS_CURRENT_SUMMARY, // sent periodically during write mode stating number of successful/failed so far 10 | 11 | WORKER_WRITES_FINISHED_SUMMARY, // sent by workers when idle and report total WRITE mode messages processed 12 | WORKER_VALIDATIONS_FINISHED_SUMMARY, // sent by workers when idle and report total VALIDATE mode messages processed 13 | 14 | WORKER_ERROR_REPORT_DETAILS, // sent by workers when REPORT_ERRORS mode is switched on 15 | 16 | CMD_WORKER_SHUTDOWN // sent when master to tell worker to shutdown 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/control/ControlChannel.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.control; 2 | 3 | import java.net.InetAddress; 4 | import java.util.Arrays; 5 | import java.util.HashMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.UUID; 9 | 10 | import org.apache.log4j.Logger; 11 | 12 | import com.amazonaws.auth.BasicAWSCredentials; 13 | import com.amazonaws.auth.policy.Policy; 14 | import com.amazonaws.auth.policy.Principal; 15 | import com.amazonaws.auth.policy.Resource; 16 | import com.amazonaws.auth.policy.Statement; 17 | import com.amazonaws.auth.policy.Statement.Effect; 18 | import com.amazonaws.auth.policy.actions.SQSActions; 19 | import com.amazonaws.auth.policy.conditions.ConditionFactory; 20 | import com.amazonaws.services.sns.AmazonSNSClient; 21 | import com.amazonaws.services.sns.model.CreateTopicResult; 22 | import com.amazonaws.services.sns.model.ListTopicsResult; 23 | import com.amazonaws.services.sns.model.SubscribeResult; 24 | import com.amazonaws.services.sns.model.Topic; 25 | import com.amazonaws.services.sqs.AmazonSQSClient; 26 | import com.amazonaws.services.sqs.model.CreateQueueResult; 27 | import com.amazonaws.services.sqs.model.Message; 28 | import com.amazonaws.services.sqs.model.ReceiveMessageRequest; 29 | import com.amazonaws.services.sqs.model.ReceiveMessageResult; 30 | import com.amazonaws.services.sqs.model.SetQueueAttributesRequest; 31 | import com.google.gson.Gson; 32 | import com.google.gson.reflect.TypeToken; 33 | 34 | public class ControlChannel implements Runnable { 35 | 36 | private static final Logger logger = Logger.getLogger(ControlChannel.class); 37 | 38 | private AmazonSNSClient snsClient = null; 39 | private AmazonSQSClient sqsClient = null; 40 | 41 | private String snsTopicARN = null; 42 | private String snsSubscriptionARN = null; 43 | private String sqsQueueUrl = null; 44 | private String sqsQueueARN = null; 45 | 46 | private Gson gson = new Gson(); 47 | 48 | private CCPayloadHandler ccPayloadHandler = null; 49 | private String mySourceIdentifier = null; 50 | private String mySourceIp = null; 51 | private Thread consumerThread = null; 52 | 53 | private boolean canDestroyTopic = false; 54 | 55 | private boolean running = true; 56 | private String snsControlTopicName = null; 57 | 58 | private String uuid = UUID.randomUUID().toString().replace("-", "").substring(0,4);; 59 | 60 | public ControlChannel(boolean callerIsMaster, 61 | String awsAccessKey, String awsSecretKey, String snsControlTopicName, 62 | String userAccountPrincipalId, 63 | String userARN, 64 | CCPayloadHandler ccPayloadHandler) throws Exception { 65 | super(); 66 | 67 | try { 68 | this.mySourceIp = InetAddress.getLocalHost().getHostAddress(); 69 | } catch(Exception e) { 70 | logger.error("Error getting local inet address: " + e.getMessage()); 71 | } 72 | 73 | mySourceIdentifier = determineHostName() + "-" +uuid; 74 | this.snsControlTopicName = snsControlTopicName; 75 | 76 | if (callerIsMaster) { 77 | canDestroyTopic = true; 78 | this.snsControlTopicName += "-" + mySourceIdentifier; 79 | } 80 | 81 | this.ccPayloadHandler = ccPayloadHandler; 82 | 83 | sqsClient = new AmazonSQSClient(new BasicAWSCredentials(awsAccessKey, awsSecretKey)); 84 | snsClient = new AmazonSNSClient(new BasicAWSCredentials(awsAccessKey, awsSecretKey)); 85 | 86 | 87 | this.connectToTopic(callerIsMaster, 1000, userAccountPrincipalId, userARN); 88 | 89 | 90 | 91 | } 92 | 93 | 94 | public void connectToTopic(boolean callerIsMaster, int maxAttempts, String userAccountPrincipalId, String userARN) throws Exception { 95 | 96 | 97 | // try up to max attempts to connect to pre-existing topic 98 | for (int i=0; i topics = listResult.getTopics(); 104 | 105 | while(topics != null) { 106 | 107 | for (Topic topic : topics) { 108 | 109 | // note we do index of match.... 110 | if (topic.getTopicArn().indexOf(snsControlTopicName) != -1) { 111 | snsTopicARN = topic.getTopicArn(); 112 | logger.info("Found existing SNS topic by name: "+snsControlTopicName + " @ " + snsTopicARN); 113 | break; 114 | } 115 | } 116 | 117 | String nextToken = listResult.getNextToken(); 118 | 119 | if (nextToken != null && snsTopicARN == null) { 120 | listResult = snsClient.listTopics(nextToken); 121 | topics = listResult.getTopics(); 122 | 123 | } else { 124 | break; 125 | } 126 | } 127 | 128 | // if consumer, retry, otherwise is master, so just exit quick to create... 129 | if (snsTopicARN == null && !callerIsMaster) { 130 | Thread.currentThread().sleep(1000); 131 | continue; 132 | } else { 133 | break; // exit; 134 | } 135 | } 136 | 137 | 138 | 139 | // if master only he can create... 140 | if (snsTopicARN == null && callerIsMaster) { 141 | this.snsControlTopicName = this.snsControlTopicName.substring(0,(snsControlTopicName.length() > 80 ? 80 : this.snsControlTopicName.length())); 142 | 143 | logger.info("Attempting to create new SNS control channel topic by name: "+this.snsControlTopicName); 144 | 145 | CreateTopicResult createTopicResult = snsClient.createTopic(this.snsControlTopicName); 146 | snsTopicARN = createTopicResult.getTopicArn(); 147 | snsClient.addPermission(snsTopicARN, "Permit_SNSAdd", 148 | Arrays.asList(new String[]{userARN}), 149 | Arrays.asList(new String[]{"Publish","Subscribe","Receive"})); 150 | logger.info("Created new SNS control channel topic by name: "+this.snsControlTopicName + " @ " + snsTopicARN); 151 | 152 | } else if (snsTopicARN == null) { 153 | throw new Exception("Worker() cannot start, snsControlTopicName has yet to be created by master?: " + this.snsControlTopicName); 154 | } 155 | 156 | // http://www.jorgjanke.com/2013/01/aws-sns-topic-subscriptions-with-sqs.html 157 | 158 | // create SQS queue to get SNS notifications (max 80 len) 159 | String prefix = ("s3bktLoaderCC_" + mySourceIdentifier); 160 | String sqsQueueName = prefix.substring(0,(prefix.length() > 80 ? 80 : prefix.length())); 161 | 162 | CreateQueueResult createQueueResult = sqsClient.createQueue(sqsQueueName); 163 | this.sqsQueueUrl = createQueueResult.getQueueUrl(); 164 | this.sqsQueueARN = sqsClient.getQueueAttributes(sqsQueueUrl, Arrays.asList(new String[]{"QueueArn"})).getAttributes().get("QueueArn"); 165 | 166 | Statement statement = new Statement(Effect.Allow) 167 | .withActions(SQSActions.SendMessage) 168 | .withPrincipals(new Principal("*")) 169 | .withConditions(ConditionFactory.newSourceArnCondition(snsTopicARN)) 170 | .withResources(new Resource(sqsQueueARN)); 171 | Policy policy = new Policy("SubscriptionPermission").withStatements(statement); 172 | 173 | HashMap attributes = new HashMap(); 174 | attributes.put("Policy", policy.toJson()); 175 | SetQueueAttributesRequest request = new SetQueueAttributesRequest(sqsQueueUrl, attributes); 176 | sqsClient.setQueueAttributes(request); 177 | 178 | logger.info("Created SQS queue: " + sqsQueueARN + " @ " + sqsQueueUrl); 179 | 180 | // subscribe our SQS queue to the SNS:s3MountTest topic 181 | SubscribeResult subscribeResult = snsClient.subscribe(snsTopicARN,"sqs",sqsQueueARN); 182 | snsSubscriptionARN = subscribeResult.getSubscriptionArn(); 183 | logger.info("Subscribed for messages from SNS control channel:" + snsTopicARN + " ----> SQS: "+sqsQueueARN); 184 | logger.info("Subscription ARN: " + snsSubscriptionARN); 185 | 186 | this.consumerThread = new Thread(this,"ControlChannel msg consumer thread"); 187 | this.consumerThread.start(); 188 | 189 | logger.info("\n-------------------------------------------\n" + 190 | "CONTROL CHANNEL: ALL SNS/SQS resources hooked up OK\n" + 191 | "-------------------------------------------\n"); 192 | } 193 | 194 | 195 | public void send(boolean fromMaster, CCPayloadType type, Object value) throws Exception { 196 | this.send(fromMaster,type,null,value); 197 | } 198 | 199 | public void send(boolean fromMaster, CCPayloadType type, String onlyForHostOrIp, Object value) throws Exception { 200 | CCPayload payload = new CCPayload(); 201 | payload.fromMaster = fromMaster; 202 | payload.type = type; 203 | payload.value = value; 204 | payload.onlyForHostIdOrIP = onlyForHostOrIp; 205 | payload.sourceHostId = this.mySourceIdentifier.trim(); 206 | payload.sourceHostIP = this.mySourceIp; 207 | 208 | logger.debug("Sending: " + type + "="+value); 209 | 210 | // send! 211 | this.snsClient.publish(this.snsTopicARN, gson.toJson(payload)); 212 | } 213 | 214 | public void run() { 215 | 216 | while(running) { 217 | 218 | try { 219 | 220 | ReceiveMessageRequest req = new ReceiveMessageRequest(); 221 | req.setWaitTimeSeconds(10); 222 | req.setMaxNumberOfMessages(10); 223 | req.setVisibilityTimeout(300); 224 | req.setQueueUrl(sqsQueueUrl); 225 | 226 | ReceiveMessageResult msgResult = sqsClient.receiveMessage(req); 227 | List messages = msgResult.getMessages(); 228 | 229 | for (Message msg : messages) { 230 | 231 | CCPayload payload = null; 232 | 233 | try { 234 | Map body = gson.fromJson(msg.getBody(), new TypeToken>(){}.getType()); 235 | payload = gson.fromJson(body.get("Message"), CCPayload.class); 236 | 237 | } catch(Exception e) { 238 | logger.error("ERROR: unexpected error converting SQS Message " + 239 | "body (json -> CCPayload) body= " + msg.getBody() + " error="+e.getMessage()); 240 | 241 | // delete the message we just analyzed 242 | sqsClient.deleteMessage(sqsQueueUrl, msg.getReceiptHandle()); 243 | 244 | continue; 245 | } 246 | 247 | // if NOT equal to THIS MACHINE... defer tp payload handler 248 | if (!payload.sourceHostId.equalsIgnoreCase(mySourceIdentifier)) { 249 | this.ccPayloadHandler.handlePayload(payload); 250 | } 251 | 252 | // delete the message we just analyzed 253 | sqsClient.deleteMessage(sqsQueueUrl, msg.getReceiptHandle()); 254 | } 255 | 256 | } catch(Exception e) { 257 | logger.error("run() " + e.getMessage(),e); 258 | } 259 | 260 | try { 261 | Thread.currentThread().sleep(500); 262 | } catch(Exception ignore) {} 263 | 264 | } 265 | } 266 | 267 | 268 | 269 | public void destroy() throws Exception { 270 | 271 | this.running = false; 272 | 273 | Thread.currentThread().sleep(30000); 274 | 275 | if (snsClient != null) { 276 | try { 277 | logger.debug("destroy() unsubscribe " + this.snsSubscriptionARN); 278 | snsClient.unsubscribe(snsSubscriptionARN); 279 | } catch(Exception e) { 280 | logger.debug("destroy() error: " + e.getMessage()); 281 | } 282 | } 283 | 284 | if (sqsClient != null) { 285 | try { 286 | logger.debug("destroy() " + this.sqsQueueUrl); 287 | sqsClient.deleteQueue(sqsQueueUrl); 288 | } catch(Exception e) { 289 | logger.debug("destroy() error: " + e.getMessage()); 290 | } 291 | } 292 | 293 | if (canDestroyTopic && snsClient != null) { 294 | try { 295 | logger.debug("destroy() " + this.snsTopicARN); 296 | snsClient.deleteTopic(this.snsTopicARN); 297 | } catch(Exception e) { 298 | logger.debug("destroy() error: " + e.getMessage()); 299 | } 300 | } 301 | 302 | } 303 | 304 | public AmazonSNSClient getSnsClient() { 305 | return snsClient; 306 | } 307 | 308 | public void setSnsClient(AmazonSNSClient snsClient) { 309 | this.snsClient = snsClient; 310 | } 311 | 312 | private static String determineHostName() throws Exception { 313 | 314 | InetAddress addr = InetAddress.getLocalHost(); 315 | 316 | // Get IP Address 317 | byte[] ipAddr = addr.getAddress(); 318 | // Get sourceHost 319 | String tmpHost = addr.getHostName(); 320 | 321 | // we only care about the HOST portion, strip everything else 322 | // as some boxes report a fully qualified sourceHost such as 323 | // host.domainname.com 324 | 325 | int firstDot = tmpHost.indexOf('.'); 326 | if (firstDot != -1) { 327 | tmpHost = tmpHost.substring(0,firstDot); 328 | } 329 | return tmpHost; 330 | 331 | } 332 | 333 | public static Logger getLogger() { 334 | return logger; 335 | } 336 | 337 | public AmazonSQSClient getSqsClient() { 338 | return sqsClient; 339 | } 340 | 341 | public String getSnsTopicARN() { 342 | return snsTopicARN; 343 | } 344 | 345 | public String getSnsSubscriptionARN() { 346 | return snsSubscriptionARN; 347 | } 348 | 349 | public String getSqsQueueUrl() { 350 | return sqsQueueUrl; 351 | } 352 | 353 | public String getSqsQueueARN() { 354 | return sqsQueueARN; 355 | } 356 | 357 | public Gson getGson() { 358 | return gson; 359 | } 360 | 361 | public CCPayloadHandler getCcPayloadHandler() { 362 | return ccPayloadHandler; 363 | } 364 | 365 | public String getMySourceIdentifier() { 366 | return mySourceIdentifier; 367 | } 368 | 369 | public Thread getConsumerThread() { 370 | return consumerThread; 371 | } 372 | 373 | 374 | public String getMySourceIp() { 375 | return mySourceIp; 376 | } 377 | 378 | 379 | } 380 | 381 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/master/ShutdownInfo.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.master; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class ShutdownInfo { 7 | 8 | public String s3LogBucketName = null; 9 | public String s3LogBucketFolderRoot = null; 10 | public List workerLogFilesToUpload = new ArrayList(); 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/master/TOCGenerationEventHandler.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.master; 2 | 3 | import java.util.Collection; 4 | 5 | import org.bitsofinfo.s3.toc.TocInfo; 6 | 7 | public interface TOCGenerationEventHandler { 8 | 9 | public void tocGenerationComplete(Collection generatedTOC); 10 | 11 | public void tocGenerationError(String msg, Exception exception); 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/master/TOCGeneratorAndSender.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.master; 2 | 3 | import java.util.Collection; 4 | import java.util.Queue; 5 | 6 | import org.apache.log4j.Logger; 7 | import org.bitsofinfo.s3.toc.SourceTOCGenerator; 8 | import org.bitsofinfo.s3.toc.TOCPayload.MODE; 9 | import org.bitsofinfo.s3.toc.TOCQueue; 10 | import org.bitsofinfo.s3.toc.TocInfo; 11 | 12 | public class TOCGeneratorAndSender implements Runnable { 13 | 14 | private static final Logger logger = Logger.getLogger(TOCGeneratorAndSender.class); 15 | 16 | private Thread myThread = new Thread(this); 17 | private TocInfoQueueSender tocFileInfoQueueSender = null; 18 | private Queue tocFileInfoQueue = null; 19 | private SourceTOCGenerator tocGenerator = null; 20 | private MODE mode = null; 21 | private TOCGenerationEventHandler handler = null; 22 | private Collection toc = null; 23 | 24 | public TOCGeneratorAndSender(MODE mode, 25 | TOCGenerationEventHandler handler, 26 | TOCQueue tocQueue, 27 | int tocDispatchThreadsTotal, 28 | Collection toc) { 29 | 30 | this.handler = handler; 31 | this.mode = mode; 32 | this.toc = toc; 33 | 34 | // populate the queue that the "sender" will concurrently consume 35 | // from while the TOC is being generated 36 | this.tocFileInfoQueue = new TocInfoSizeAwareQueue(100000000); 37 | this.tocFileInfoQueue.addAll(this.toc); 38 | 39 | this.tocFileInfoQueueSender = new TocInfoQueueSender(mode, tocQueue, tocDispatchThreadsTotal, this.tocFileInfoQueue); 40 | 41 | } 42 | 43 | public TOCGeneratorAndSender(MODE mode, 44 | TOCGenerationEventHandler handler, 45 | TOCQueue tocQueue, 46 | int tocDispatchThreadsTotal, 47 | SourceTOCGenerator tocGenerator) { 48 | 49 | this.tocGenerator = tocGenerator; 50 | this.handler = handler; 51 | this.mode = mode; 52 | 53 | // generate a queue that the "sender" will concurrently consume 54 | // from while the TOC is being generated 55 | this.tocFileInfoQueue = new TocInfoSizeAwareQueue(100000000); 56 | 57 | this.tocFileInfoQueueSender = new TocInfoQueueSender(mode, tocQueue, tocDispatchThreadsTotal, this.tocFileInfoQueue); 58 | 59 | } 60 | 61 | public void generateAndSendTOC() { 62 | logger.debug("Thread started..."); 63 | myThread.start(); 64 | } 65 | 66 | public void destroy() { 67 | logger.debug("Destroy..."); 68 | 69 | try { 70 | myThread.interrupt(); 71 | } catch(Exception ignore){} 72 | 73 | try { 74 | this.tocFileInfoQueueSender.destroy(); 75 | } catch(Exception ignore){} 76 | 77 | 78 | } 79 | 80 | private Collection getTOC() throws Exception { 81 | if (this.toc == null) { 82 | this.toc = tocGenerator.generateTOC(tocFileInfoQueue); 83 | } 84 | 85 | return toc; 86 | } 87 | 88 | public void run() { 89 | try { 90 | // generate and get all TOC messages (write live to the queue we just created) 91 | tocFileInfoQueueSender.start(); // start the consumer 92 | 93 | logger.info("run("+mode+") generating TOC..."); 94 | Collection toc = getTOC(); 95 | 96 | // set on handler 97 | handler.tocGenerationComplete(toc); 98 | 99 | // while the queue is not empty, sleep.... 100 | while (tocFileInfoQueue.size() > 0) { 101 | logger.debug("TOC generation complete, waiting for TOCFileInfoQueueSender" + 102 | " thread to complete sending to SQS.. size:" + tocFileInfoQueue.size()); 103 | Thread.currentThread().sleep(10000); 104 | } 105 | 106 | // queue is empty.. stop it (i.e. tocFileInfoQueueSender is done consuming all from it) 107 | tocFileInfoQueueSender.destroy(); 108 | 109 | logger.info("TOCGeneratorAndSender(MODE="+mode+") done sending " + toc.size() + " tocPaths over TOCQueue...."); 110 | 111 | } catch(InterruptedException e) { 112 | logger.warn("Caught InterruptedException, stopping TOC generation!"); 113 | 114 | } catch(Exception e) { 115 | logger.error("Error generating TOC: " + e.getMessage(),e); 116 | handler.tocGenerationError("Error generating TOC: " + e.getMessage(), e); 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/master/TOCQueueEmptier.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.master; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Random; 6 | 7 | import org.apache.log4j.Logger; 8 | import org.bitsofinfo.s3.toc.TOCQueue; 9 | 10 | public class TOCQueueEmptier implements Runnable { 11 | 12 | 13 | private static final Logger logger = Logger.getLogger(TOCQueueEmptier.class); 14 | 15 | private TOCQueue tocQueue = null; 16 | private boolean running = true; 17 | private List threads = new ArrayList(); 18 | 19 | public TOCQueueEmptier(TOCQueue tocQueue, int totalThreads) { 20 | this.tocQueue= tocQueue; 21 | 22 | for (int i=0; i toConsumeFrom = null; 20 | private boolean running = true; 21 | private List threads = new ArrayList(); 22 | private MODE mode = null; 23 | 24 | public TocInfoQueueSender(MODE mode, TOCQueue tocQueue, int totalThreads, Queue toConsumeFrom) { 25 | this.toConsumeFrom = toConsumeFrom; 26 | this.tocQueue= tocQueue; 27 | this.mode = mode; 28 | 29 | for (int i=0; i { 20 | 21 | private long largeFileMinSizeBytes = 0; 22 | private Queue tocQueue = new ConcurrentLinkedQueue(); 23 | private Queue largeFileTocQueue = new ConcurrentLinkedQueue(); 24 | private long largeFileLastPolled = System.currentTimeMillis(); 25 | private long ensureLargeFilePolledMinMS = 1000; 26 | 27 | public TocInfoSizeAwareQueue(long largeFileMinSizeBytes) { 28 | this.largeFileMinSizeBytes = largeFileMinSizeBytes; 29 | } 30 | 31 | 32 | @Override 33 | public TocInfo poll() { 34 | 35 | long now = System.currentTimeMillis(); 36 | 37 | // if tocQueue empty go right to large file one 38 | if (tocQueue.isEmpty()) { 39 | largeFileLastPolled = now; 40 | return largeFileTocQueue.poll(); 41 | 42 | // if largeFileTocQueue is NOT empty, and its been longer than our min large file send time 43 | // poll from the large files 44 | } else if (!largeFileTocQueue.isEmpty() && (now - largeFileLastPolled) > ensureLargeFilePolledMinMS) { 45 | largeFileLastPolled = now; 46 | return largeFileTocQueue.poll(); 47 | } 48 | 49 | // otherwise just poll from tocQueue 50 | return tocQueue.poll(); 51 | } 52 | 53 | 54 | @Override 55 | public boolean addAll(Collection c) { 56 | for (TocInfo ti : c) { 57 | if (!ti.isDirectory() && ti.getSize() > this.largeFileMinSizeBytes) { 58 | largeFileTocQueue.add(ti); 59 | } else { 60 | tocQueue.add(ti); 61 | } 62 | } 63 | return true; 64 | } 65 | 66 | 67 | @Override 68 | public int size() { 69 | return (tocQueue.size() + largeFileTocQueue.size()); 70 | } 71 | 72 | 73 | @Override 74 | public boolean add(TocInfo ti) { 75 | if (!ti.isDirectory() && ti.getSize() > this.largeFileMinSizeBytes) { 76 | return largeFileTocQueue.add(ti); 77 | } else { 78 | return tocQueue.add(ti); 79 | } 80 | } 81 | 82 | @Override 83 | public boolean isEmpty() { 84 | return (tocQueue.isEmpty() && largeFileTocQueue.isEmpty()); 85 | } 86 | 87 | @Override 88 | public void clear() { 89 | tocQueue.clear(); 90 | largeFileTocQueue.clear(); 91 | } 92 | 93 | @Override 94 | public boolean contains(Object o) { 95 | return tocQueue.contains(o) || largeFileTocQueue.contains(o); 96 | } 97 | 98 | @Override 99 | public boolean containsAll(Collection c) { 100 | throw new UnsupportedOperationException(); 101 | } 102 | 103 | 104 | @Override 105 | public Iterator iterator() { 106 | throw new UnsupportedOperationException(); 107 | } 108 | 109 | @Override 110 | public boolean remove(Object o) { 111 | throw new UnsupportedOperationException(); 112 | } 113 | 114 | @Override 115 | public boolean removeAll(Collection c) { 116 | throw new UnsupportedOperationException(); 117 | } 118 | 119 | @Override 120 | public boolean retainAll(Collection c) { 121 | throw new UnsupportedOperationException(); 122 | } 123 | 124 | @Override 125 | public Object[] toArray() { 126 | throw new UnsupportedOperationException(); 127 | } 128 | 129 | @Override 130 | public T[] toArray(T[] a) { 131 | throw new UnsupportedOperationException(); 132 | } 133 | 134 | 135 | @Override 136 | public TocInfo element() { 137 | throw new UnsupportedOperationException(); 138 | } 139 | 140 | @Override 141 | public boolean offer(TocInfo e) { 142 | throw new UnsupportedOperationException(); 143 | } 144 | 145 | @Override 146 | public TocInfo peek() { 147 | throw new UnsupportedOperationException(); 148 | } 149 | 150 | @Override 151 | public TocInfo remove() { 152 | throw new UnsupportedOperationException(); 153 | } 154 | 155 | 156 | } 157 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/master/WorkerInfo.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.master; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Date; 5 | import java.util.HashMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Stack; 9 | 10 | import org.bitsofinfo.s3.control.CCMode; 11 | import org.bitsofinfo.s3.control.CCPayload; 12 | import org.bitsofinfo.s3.control.CCPayloadType; 13 | import org.bitsofinfo.s3.worker.ResultSummary; 14 | 15 | import com.google.gson.Gson; 16 | 17 | public class WorkerInfo { 18 | 19 | private String hostId = null; 20 | private CCMode currentMode = null; 21 | private String ip = null; 22 | private int totalWritten = 0; 23 | private int totalValidated = 0; 24 | private int totalWriteFailures = 0; 25 | private int totalValidateFailures = 0; 26 | private int totalWriteMonitorErrors = 0; 27 | private int totalPostWriteLocalValidateErrors = 0; 28 | 29 | private Map payloadsReceived = new HashMap(); 30 | private List orderedPayloadsReceived = new ArrayList(); 31 | 32 | // by type to stack of most recent of that payload type received 33 | private Map> payloadType2LifoStack = new HashMap>(); 34 | 35 | private Gson gson = new Gson(); 36 | 37 | public WorkerInfo(String hostId, String ip) { 38 | super(); 39 | this.hostId = hostId; 40 | this.ip = ip; 41 | } 42 | 43 | public String getHostId() { 44 | return hostId; 45 | } 46 | 47 | public String getIP() { 48 | return ip; 49 | } 50 | 51 | public int getTotalWritten() { 52 | return totalWritten; 53 | } 54 | public int getTotalValidated() { 55 | return totalValidated; 56 | } 57 | public int getTotalWriteMonitorErrors() { 58 | return totalWriteMonitorErrors; 59 | } 60 | 61 | public CCMode getCurrentMode() { 62 | return this.currentMode; 63 | } 64 | 65 | public void setCurrentMode(CCMode mode) { 66 | this.currentMode = mode; 67 | } 68 | 69 | public synchronized void addPayloadReceived(CCPayload payload) { 70 | 71 | if (!payload.sourceHostId.trim().equalsIgnoreCase(this.hostId)) { 72 | throw new RuntimeException("cannot add payload received for host other than what this " + 73 | "WorkInfo is configured for! me:"+this.hostId + " payload:"+payload.sourceHostId); 74 | } 75 | this.payloadsReceived.put(new Date(), payload); 76 | this.orderedPayloadsReceived.add(payload); 77 | 78 | // push onto stack 79 | Stack stack = this.payloadType2LifoStack.get(payload.type); 80 | if (stack == null) { 81 | stack = new Stack(); 82 | this.payloadType2LifoStack.put(payload.type, stack); 83 | } 84 | stack.push(payload); 85 | 86 | 87 | if (payload.type == CCPayloadType.WORKER_CURRENT_MODE) { 88 | this.currentMode = CCMode.valueOf(payload.value.toString()); 89 | } 90 | 91 | 92 | if (payload.type == CCPayloadType.WORKER_WRITES_FINISHED_SUMMARY) { 93 | ResultSummary writeSummary = gson.fromJson(payload.value.toString(), ResultSummary.class); 94 | this.totalWritten = writeSummary.total; 95 | this.totalWriteFailures = writeSummary.failed; 96 | this.totalWriteMonitorErrors = writeSummary.writeMonitorErrors; 97 | this.totalPostWriteLocalValidateErrors = writeSummary.postWriteLocalValidateErrors; 98 | } 99 | 100 | if (payload.type == CCPayloadType.WORKER_VALIDATIONS_FINISHED_SUMMARY) { 101 | ResultSummary validateSummary = gson.fromJson(payload.value.toString(), ResultSummary.class); 102 | this.totalValidated = validateSummary.total; 103 | this.totalValidateFailures = validateSummary.failed; 104 | this.totalWriteMonitorErrors = validateSummary.writeMonitorErrors; 105 | this.totalPostWriteLocalValidateErrors = validateSummary.postWriteLocalValidateErrors; 106 | } 107 | 108 | 109 | if (payload.type == CCPayloadType.WORKER_VALIDATIONS_CURRENT_SUMMARY) { 110 | ResultSummary validateSummary = gson.fromJson(payload.value.toString(), ResultSummary.class); 111 | this.totalValidated = validateSummary.total; 112 | this.totalValidateFailures = validateSummary.failed; 113 | this.totalWriteMonitorErrors = validateSummary.writeMonitorErrors; 114 | this.totalPostWriteLocalValidateErrors = validateSummary.postWriteLocalValidateErrors; 115 | } 116 | 117 | if (payload.type == CCPayloadType.WORKER_WRITES_CURRENT_SUMMARY) { 118 | ResultSummary writeSummary = gson.fromJson(payload.value.toString(), ResultSummary.class); 119 | this.totalWritten = writeSummary.total; 120 | this.totalWriteFailures = writeSummary.failed; 121 | this.totalWriteMonitorErrors = writeSummary.writeMonitorErrors; 122 | this.totalPostWriteLocalValidateErrors = writeSummary.postWriteLocalValidateErrors; 123 | } 124 | } 125 | 126 | public Map getPayloadsReceived() { 127 | return payloadsReceived; 128 | } 129 | 130 | public void setPayloadsReceived(Map payloadsReceived) { 131 | this.payloadsReceived = payloadsReceived; 132 | } 133 | 134 | public CCPayload getLastPayloadReceived() { 135 | if (orderedPayloadsReceived.size() > 0) { 136 | return orderedPayloadsReceived.get(orderedPayloadsReceived.size()-1); 137 | } 138 | return null; 139 | } 140 | 141 | 142 | public boolean writingCurrentSummaryReceived() { 143 | if (payloadReceived(CCPayloadType.WORKER_WRITES_CURRENT_SUMMARY)) { 144 | return true; 145 | } 146 | 147 | return false; 148 | } 149 | 150 | 151 | public boolean validationsCurrentSummaryReceived() { 152 | if (payloadReceived(CCPayloadType.WORKER_VALIDATIONS_CURRENT_SUMMARY)) { 153 | return true; 154 | } 155 | 156 | return false; 157 | } 158 | 159 | 160 | public boolean writingIsComplete() { 161 | if (payloadReceived(CCPayloadType.WORKER_WRITES_FINISHED_SUMMARY)) { 162 | return true; 163 | } 164 | 165 | return false; 166 | } 167 | 168 | public boolean writeSummaryHasFailures() { 169 | if (payloadReceived(CCPayloadType.WORKER_WRITES_FINISHED_SUMMARY)) { 170 | CCPayload payload = getMostRecentPayload(CCPayloadType.WORKER_WRITES_FINISHED_SUMMARY); 171 | 172 | if (payload == null) { 173 | return false; 174 | } 175 | 176 | ResultSummary writeSummary = gson.fromJson(payload.value.toString(), ResultSummary.class); 177 | if (writeSummary.failed > 0 || 178 | writeSummary.writeMonitorErrors > 0) { 179 | 180 | return true; 181 | } 182 | } 183 | 184 | return false; 185 | } 186 | 187 | public boolean validationSummaryHasFailures() { 188 | if (payloadReceived(CCPayloadType.WORKER_VALIDATIONS_FINISHED_SUMMARY)) { 189 | CCPayload payload = getMostRecentPayload(CCPayloadType.WORKER_VALIDATIONS_FINISHED_SUMMARY); 190 | 191 | if (payload == null) { 192 | return false; 193 | } 194 | 195 | ResultSummary validationsSummary = gson.fromJson(payload.value.toString(), ResultSummary.class); 196 | if (validationsSummary.failed > 0) { 197 | return true; 198 | } 199 | } 200 | 201 | return false; 202 | } 203 | 204 | 205 | public boolean writeCurrentSummaryHasFailures() { 206 | if (payloadReceived(CCPayloadType.WORKER_WRITES_CURRENT_SUMMARY)) { 207 | CCPayload payload = getMostRecentPayload(CCPayloadType.WORKER_WRITES_CURRENT_SUMMARY); 208 | 209 | if (payload == null) { 210 | return false; 211 | } 212 | 213 | ResultSummary writeSummary = gson.fromJson(payload.value.toString(), ResultSummary.class); 214 | if (writeSummary.failed > 0 || 215 | writeSummary.writeMonitorErrors > 0) { 216 | return true; 217 | } 218 | } 219 | 220 | return false; 221 | } 222 | 223 | public boolean writeCurrentSummaryHasWriteMonitorErrors() { 224 | if (payloadReceived(CCPayloadType.WORKER_WRITES_CURRENT_SUMMARY)) { 225 | CCPayload payload = getMostRecentPayload(CCPayloadType.WORKER_WRITES_CURRENT_SUMMARY); 226 | 227 | if (payload == null) { 228 | return false; 229 | } 230 | 231 | ResultSummary writeSummary = gson.fromJson(payload.value.toString(), ResultSummary.class); 232 | if (writeSummary.writeMonitorErrors > 0) { 233 | return true; 234 | } 235 | } 236 | 237 | return false; 238 | } 239 | 240 | public boolean validationCurrentSummaryHasFailures() { 241 | if (payloadReceived(CCPayloadType.WORKER_VALIDATIONS_CURRENT_SUMMARY)) { 242 | CCPayload payload = getMostRecentPayload(CCPayloadType.WORKER_VALIDATIONS_CURRENT_SUMMARY); 243 | 244 | if (payload == null) { 245 | return false; 246 | } 247 | 248 | ResultSummary validationsSummary = gson.fromJson(payload.value.toString(), ResultSummary.class); 249 | if (validationsSummary.failed > 0) { 250 | return true; 251 | } 252 | } 253 | 254 | return false; 255 | } 256 | 257 | 258 | public boolean validationIsComplete() { 259 | if (payloadReceived(CCPayloadType.WORKER_VALIDATIONS_FINISHED_SUMMARY)) { 260 | return true; 261 | } 262 | 263 | return false; 264 | } 265 | 266 | public boolean errorReportIsReceived() { 267 | if (payloadReceived(CCPayloadType.WORKER_ERROR_REPORT_DETAILS)) { 268 | return true; 269 | } 270 | 271 | return false; 272 | } 273 | 274 | public boolean payloadReceived(CCPayloadType type) { 275 | return getMostRecentPayload(type) != null; 276 | } 277 | 278 | public CCPayload getMostRecentPayload(CCPayloadType type) { 279 | 280 | Stack typeStack = this.payloadType2LifoStack.get(type); 281 | if (typeStack != null && typeStack.size() > 0) { 282 | return typeStack.peek(); 283 | } 284 | 285 | return null; 286 | } 287 | 288 | public Object getPayloadValue(CCPayloadType type) { 289 | CCPayload payload = getMostRecentPayload(type); 290 | if (payload != null) { 291 | return payload.value; 292 | } 293 | return null; 294 | 295 | } 296 | 297 | public int getTotalPostWriteLocalValidateErrors() { 298 | return totalPostWriteLocalValidateErrors; 299 | } 300 | 301 | public int getTotalWriteFailures() { 302 | return totalWriteFailures; 303 | } 304 | 305 | public int getTotalValidateFailures() { 306 | return totalValidateFailures; 307 | } 308 | 309 | 310 | } 311 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/master/WorkerRegistry.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.master; 2 | 3 | import java.util.HashSet; 4 | import java.util.Map; 5 | import java.util.Set; 6 | import java.util.TreeMap; 7 | 8 | import org.apache.log4j.Logger; 9 | import org.bitsofinfo.s3.control.CCMode; 10 | import org.bitsofinfo.s3.control.CCPayload; 11 | 12 | public class WorkerRegistry { 13 | 14 | private static final Logger logger = Logger.getLogger(WorkerRegistry.class); 15 | 16 | 17 | private Map registry = new TreeMap(String.CASE_INSENSITIVE_ORDER); 18 | 19 | 20 | public WorkerInfo getWorkerByIP(String ip) { 21 | for (WorkerInfo info : registry.values()) { 22 | if (info.getIP().equals(ip)) { 23 | return info; 24 | } 25 | } 26 | 27 | return null; 28 | } 29 | 30 | public WorkerInfo register(String workerHostId, String workerIP) { 31 | if (getWorkerInfo(workerHostId) == null) { 32 | WorkerInfo newWorker = new WorkerInfo(workerHostId.trim(), workerIP); 33 | registry.put(workerHostId.trim(), newWorker); 34 | logger.trace("Registered worker: " + workerHostId + " ip:"+ workerIP); 35 | } 36 | return getWorkerInfo(workerHostId); 37 | } 38 | 39 | public void registerWorkerPayload(CCPayload payload) { 40 | if (payload.fromMaster) { 41 | throw new RuntimeException("Cannot call registerWorkerPaylod with one where fromMaster=true"); 42 | } 43 | 44 | WorkerInfo workerInfo = getWorkerInfo(payload.sourceHostId); 45 | if (workerInfo == null) { 46 | workerInfo = register(payload.sourceHostId, payload.sourceHostIP); 47 | } 48 | 49 | workerInfo.addPayloadReceived(payload); 50 | logger.trace("Registered Worker["+payload.sourceHostId+"] Payload: " + payload.type + " val:" + payload.value); 51 | } 52 | 53 | public Set getWorkerHostnames() { 54 | return this.registry.keySet(); 55 | } 56 | 57 | public int getTotalWritten() { 58 | int total = 0; 59 | for (WorkerInfo info : registry.values()) { 60 | total += info.getTotalWritten(); 61 | } 62 | return total; 63 | } 64 | 65 | public int getTotalWriteFailures() { 66 | int total = 0; 67 | for (WorkerInfo info : registry.values()) { 68 | total += info.getTotalWriteFailures(); 69 | } 70 | return total; 71 | } 72 | 73 | 74 | public int getTotalValidateFailures() { 75 | int total = 0; 76 | for (WorkerInfo info : registry.values()) { 77 | total += info.getTotalValidateFailures(); 78 | } 79 | return total; 80 | } 81 | 82 | 83 | public int getTotalWriteMonitorErrors() { 84 | int total = 0; 85 | for (WorkerInfo info : registry.values()) { 86 | total += info.getTotalWriteMonitorErrors(); 87 | } 88 | return total; 89 | } 90 | 91 | public int getTotalPostWriteLocalValidateErrors() { 92 | int total = 0; 93 | for (WorkerInfo info : registry.values()) { 94 | total += info.getTotalPostWriteLocalValidateErrors(); 95 | } 96 | return total; 97 | } 98 | 99 | 100 | public int size() { 101 | return registry.size(); 102 | } 103 | 104 | public Set getWorkersAwaitingErrorReport() { 105 | Set awaiting = new HashSet(); 106 | for (WorkerInfo info : registry.values()) { 107 | if (!info.errorReportIsReceived()) { 108 | awaiting.add(info.getHostId()); 109 | } 110 | } 111 | return awaiting; 112 | } 113 | 114 | 115 | public Set getWorkersAwaitingWriteReport() { 116 | Set awaiting = new HashSet(); 117 | for (WorkerInfo info : registry.values()) { 118 | if (!info.writingIsComplete()) { 119 | awaiting.add(info.getHostId()); 120 | } 121 | } 122 | return awaiting; 123 | } 124 | 125 | public Set getWorkersAwaitingValidationReport() { 126 | Set awaiting = new HashSet(); 127 | for (WorkerInfo info : registry.values()) { 128 | if (!info.validationIsComplete()) { 129 | awaiting.add(info.getHostId()); 130 | } 131 | } 132 | return awaiting; 133 | } 134 | 135 | 136 | public int getTotalValidated() { 137 | int total = 0; 138 | for (WorkerInfo info : registry.values()) { 139 | total += info.getTotalValidated(); 140 | } 141 | return total; 142 | } 143 | 144 | public WorkerInfo getWorkerInfo(String workerHostname) { 145 | return registry.get(workerHostname.trim()); 146 | } 147 | 148 | public boolean allWorkerErrorReportsAreIn() { 149 | for (WorkerInfo wi : registry.values()) { 150 | if (!wi.errorReportIsReceived()) { 151 | return false; 152 | } 153 | } 154 | 155 | return true; 156 | } 157 | 158 | public boolean allWorkerWritesAreComplete() { 159 | for (WorkerInfo wi : registry.values()) { 160 | if (!wi.writingIsComplete()) { 161 | return false; 162 | } 163 | } 164 | 165 | return true; 166 | } 167 | 168 | public boolean anyWorkerWritesAreComplete() { 169 | for (WorkerInfo wi : registry.values()) { 170 | if (wi.writingIsComplete()) { 171 | return true; 172 | } 173 | } 174 | 175 | return false; 176 | } 177 | 178 | public boolean anyWorkerCurrentWriteSummariesReceived() { 179 | for (WorkerInfo wi : registry.values()) { 180 | if (wi.writingCurrentSummaryReceived()) { 181 | return true; 182 | } 183 | } 184 | 185 | return false; 186 | } 187 | 188 | public boolean anyWorkerErrorReportsAreReceived() { 189 | for (WorkerInfo wi : registry.values()) { 190 | if (wi.errorReportIsReceived()) { 191 | return true; 192 | } 193 | } 194 | 195 | return false; 196 | } 197 | 198 | public boolean anyWorkerValidatesAreComplete() { 199 | for (WorkerInfo wi : registry.values()) { 200 | if (wi.validationIsComplete()) { 201 | return true; 202 | } 203 | } 204 | 205 | return false; 206 | } 207 | 208 | public boolean anyWorkerCurrentValidationSummariesReceived() { 209 | for (WorkerInfo wi : registry.values()) { 210 | if (wi.validationsCurrentSummaryReceived()) { 211 | return true; 212 | } 213 | } 214 | 215 | return false; 216 | } 217 | 218 | public boolean anyWorkerWritesContainErrors() { 219 | for (WorkerInfo wi : registry.values()) { 220 | if (wi.writeSummaryHasFailures()) { 221 | return true; 222 | } 223 | } 224 | 225 | return false; 226 | } 227 | 228 | public boolean anyWorkerCurrentSummaryWritesContainErrors() { 229 | for (WorkerInfo wi : registry.values()) { 230 | if (wi.writeCurrentSummaryHasFailures()) { 231 | return true; 232 | } 233 | } 234 | 235 | return false; 236 | } 237 | 238 | public boolean anyWorkerCurrentSummaryWritesContainWriteMonitorErrors() { 239 | for (WorkerInfo wi : registry.values()) { 240 | if (wi.writeCurrentSummaryHasWriteMonitorErrors()) { 241 | return true; 242 | } 243 | } 244 | 245 | return false; 246 | } 247 | 248 | public Set getWorkerHostIPsWithWriteMonitorErrors() { 249 | Set ips = new HashSet(); 250 | for (WorkerInfo wi : registry.values()) { 251 | if (wi.writeCurrentSummaryHasWriteMonitorErrors()) { 252 | ips.add(wi.getIP()); 253 | } 254 | } 255 | 256 | return ips; 257 | } 258 | 259 | public boolean anyWorkerValidationsContainErrors() { 260 | for (WorkerInfo wi : registry.values()) { 261 | if (wi.validationSummaryHasFailures()) { 262 | return true; 263 | } 264 | } 265 | 266 | return false; 267 | } 268 | 269 | public boolean anyWorkerCurrentValidationsContainErrors() { 270 | for (WorkerInfo wi : registry.values()) { 271 | if (wi.validationCurrentSummaryHasFailures()) { 272 | return true; 273 | } 274 | } 275 | 276 | return false; 277 | } 278 | 279 | public boolean allWorkerValidatesAreComplete() { 280 | for (WorkerInfo wi : registry.values()) { 281 | if (!wi.validationIsComplete()) { 282 | return false; 283 | } 284 | } 285 | 286 | return true; 287 | } 288 | 289 | public boolean allWorkersCurrentModeIs(CCMode mode) { 290 | for (WorkerInfo wi : registry.values()) { 291 | if (wi.getCurrentMode() != mode) { 292 | return false; 293 | } 294 | } 295 | 296 | return true; 297 | } 298 | 299 | } 300 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/DirectoryCrawler.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | import java.io.File; 4 | import java.util.HashSet; 5 | import java.util.Queue; 6 | import java.util.Set; 7 | 8 | import org.apache.log4j.Logger; 9 | 10 | 11 | /** 12 | * Returns all unique directory and file paths, also logs every 30s 13 | * the current number of items collected 14 | * 15 | * @author inter0p 16 | * 17 | */ 18 | public class DirectoryCrawler implements SourceTOCGenerator, Runnable { 19 | 20 | private static final Logger logger = Logger.getLogger(DirectoryCrawler.class); 21 | 22 | private File rootDir = null; 23 | private boolean running = true; 24 | private int tocInfosGenerated = 0; 25 | private long lastModifiedAtGreaterThanFilter = -1; 26 | 27 | public DirectoryCrawler() {} 28 | 29 | public DirectoryCrawler(File rootDir) { 30 | this.rootDir = rootDir; 31 | } 32 | 33 | public void setRootDir(File rootDir) { 34 | this.rootDir = rootDir; 35 | if (!rootDir.exists()) { 36 | throw new RuntimeException("DirectoryCrawler invalid rootDir: " + rootDir.getAbsolutePath()); 37 | } 38 | } 39 | 40 | public Set generateTOC(Queue tocQueue) throws Exception { 41 | Thread loggingThread = new Thread(this); 42 | 43 | Set toc = new HashSet(); 44 | loggingThread.start(); 45 | 46 | scanNode(this.rootDir,toc,tocQueue); 47 | 48 | this.running = false; // stop logging 49 | return toc; 50 | } 51 | 52 | public void run() { 53 | while (running) { 54 | try { 55 | logger.info("Generated TOC current size: " + tocInfosGenerated); 56 | Thread.currentThread().sleep(30000); 57 | 58 | } catch(Exception ignore){} 59 | } 60 | } 61 | 62 | private void scanNode(File node, Set toc, Queue tocQueue) throws Exception { 63 | 64 | try { 65 | 66 | if (node.exists() && !node.getName().startsWith(".")) { 67 | 68 | String adjustedPath = node.getAbsolutePath().replace(this.rootDir.getAbsolutePath(), ""); 69 | 70 | // skip root dir 71 | if (adjustedPath.trim().length() > 0) { 72 | 73 | if (node.isFile()) { 74 | if (this.lastModifiedAtGreaterThanFilter > 0) { 75 | if (node.lastModified() < this.lastModifiedAtGreaterThanFilter) { 76 | return; // do nothing, file is older than our filter... 77 | } 78 | } 79 | } 80 | 81 | TocInfo finfo = new TocInfo(adjustedPath, (node.isFile() ? node.length() : 0)); 82 | finfo.setIsDirectory(node.isDirectory()); 83 | toc.add(finfo); 84 | tocQueue.add(finfo); 85 | tocInfosGenerated++; // increment for logging 86 | } 87 | } 88 | 89 | if (node.exists() && !node.getName().startsWith(".") && node.isDirectory()) { 90 | for (File n : node.listFiles()) { 91 | scanNode(n,toc,tocQueue); 92 | } 93 | } 94 | 95 | } catch(Throwable e) { 96 | logger.error("Permission issue? scanNode(node:"+(node != null? (" path:"+node.getAbsolutePath() +" isDirectory():"+node.isDirectory() + " exists:"+node.exists()): "NULL") + 97 | " toc: " + (toc != null? toc.size(): "NULL") + 98 | " tocQueue:"+(tocQueue != null ? tocQueue.size() : " NULL")); 99 | 100 | throw new Exception("scanNode() " + e.getMessage(),e); 101 | } 102 | } 103 | 104 | public long getLastModifiedAtGreaterThanFilter() { 105 | return lastModifiedAtGreaterThanFilter; 106 | } 107 | 108 | public void setLastModifiedAtGreaterThanFilter(long lastModifiedAtGreaterThanFilter) { 109 | this.lastModifiedAtGreaterThanFilter = lastModifiedAtGreaterThanFilter; 110 | } 111 | 112 | } 113 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/FileCopyTOCPayloadHandler.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.File; 5 | import java.io.FileWriter; 6 | import java.io.Writer; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | import java.util.regex.Pattern; 10 | import java.util.regex.PatternSyntaxException; 11 | 12 | import org.apache.commons.exec.CommandLine; 13 | import org.apache.log4j.Logger; 14 | import org.bitsofinfo.s3.cmd.CmdResult; 15 | import org.bitsofinfo.s3.cmd.CommandExecutor; 16 | import org.bitsofinfo.s3.cmd.TocPathOpResult; 17 | import org.bitsofinfo.s3.worker.WorkerState; 18 | 19 | import com.google.gson.Gson; 20 | 21 | public class FileCopyTOCPayloadHandler implements TOCPayloadHandler { 22 | 23 | private static final Logger logger = Logger.getLogger(FileCopyTOCPayloadHandler.class); 24 | 25 | private CommandExecutor executor = null; 26 | private String sourceDirectoryRootPath = null; 27 | private String targetDirectoryRootPath = null; 28 | 29 | private boolean useRsync = true; 30 | private String rsyncOptions = null; 31 | private List rsyncOptionsList = new ArrayList(); 32 | private String rsyncTolerableErrorsRegex = null; 33 | private Pattern rysyncTolerableErrorPattern = null; 34 | 35 | private String chown = null; 36 | private boolean chownDirsOnly = false; 37 | 38 | private String chmod = null; 39 | private boolean chmodDirsOnly = false; 40 | 41 | private int retries = 1; 42 | private long retriesSleepMS = 1000; 43 | 44 | private Gson gson = new Gson(); 45 | 46 | private String postWriteLocalValidateRootDir = null; 47 | private String postWriteLocalValidateLogFile = null; 48 | private Writer postWriteLocalValidateLogFileWriter = null; 49 | private boolean postWriteLocalValidateSkipDirectories = true; 50 | private File file_postWriteLocalValidateLogFile = null; 51 | private TOCPayloadValidator tocPayloadValidator = null; 52 | private long lastValidateLogFileFlushAt = System.currentTimeMillis(); 53 | private long validateLogFileFlushEveryMS = 30000; 54 | 55 | public FileCopyTOCPayloadHandler() { 56 | this.executor = new CommandExecutor(); 57 | } 58 | 59 | public void handlePayload(TOCPayload payload, WorkerState workerState) throws Exception { 60 | 61 | 62 | String sourceFilePath = (sourceDirectoryRootPath + payload.tocInfo.getPath()).replaceAll("//", "/"); 63 | String targetFilePath = (targetDirectoryRootPath + payload.tocInfo.getPath()).replaceAll("//", "/"); 64 | 65 | // we need this for mkdirs.. 66 | String targetDirPath = null; 67 | if (payload.tocInfo.isDirectory()) { 68 | targetDirPath = targetFilePath; 69 | } else { 70 | // get the parent dir of the file 71 | targetDirPath = targetFilePath.substring(0,targetFilePath.lastIndexOf('/')); 72 | } 73 | 74 | List commandsRun = new ArrayList(); 75 | 76 | /** 77 | * MKDIR against targetDirPath 78 | */ 79 | // mkdir -p targetDirPath 80 | CommandLine mkdirCmdLine = new CommandLine("mkdir"); 81 | mkdirCmdLine.addArgument("-p"); 82 | mkdirCmdLine.addArgument(targetDirPath,false); 83 | 84 | CmdResult mkdirResult = exec(getRetries(),"mkdir",mkdirCmdLine,targetDirPath); 85 | commandsRun.add(mkdirResult); 86 | if (mkdirResult.getExitCode() > 0) { 87 | workerState.addTocPathWriteFailure( 88 | new TocPathOpResult(payload.mode, false, targetFilePath, mkdirCmdLine.toString(), gson.toJson(mkdirResult))); 89 | 90 | return; // exit 91 | } 92 | 93 | /** 94 | * RSYNC (files only) 95 | */ 96 | if (!payload.tocInfo.isDirectory()) { 97 | 98 | if (this.isUseRsync()) { 99 | // rsync --inplace -avz sourcePath targetPath 100 | CommandLine rsyncCmdLine = new CommandLine("rsync"); 101 | for (String arg : rsyncOptionsList) { 102 | rsyncCmdLine.addArgument(arg); 103 | } 104 | rsyncCmdLine.addArgument(sourceFilePath,false); 105 | rsyncCmdLine.addArgument(targetFilePath,false); 106 | 107 | CmdResult rsyncResult = exec(getRetries(),"rsync",rsyncCmdLine,targetFilePath); 108 | commandsRun.add(rsyncResult); 109 | 110 | if (rsyncResult.getExitCode() > 0) { 111 | 112 | // tolerable? 113 | if (!this.rsyncErrorIsTolerable(rsyncResult)) { 114 | 115 | workerState.addTocPathWriteFailure( 116 | new TocPathOpResult(payload.mode, false, targetFilePath, rsyncCmdLine.toString(), gson.toJson(rsyncResult))); 117 | 118 | return; // exit 119 | 120 | } else { 121 | // record that we tolerated this error 122 | workerState.addTocPathErrorTolerated( 123 | new TocPathOpResult(payload.mode, true, targetFilePath, 124 | "Error tolerated by regex: " + this.rsyncTolerableErrorsRegex, 125 | gson.toJson(rsyncResult))); 126 | } 127 | } 128 | 129 | 130 | // otherwise just use cp 131 | } else { 132 | 133 | CommandLine cpCmdLine = new CommandLine("cp"); 134 | cpCmdLine.addArgument(sourceFilePath,false); 135 | cpCmdLine.addArgument(targetFilePath,false); 136 | 137 | CmdResult cpResult = exec(getRetries(),"cp",cpCmdLine,targetFilePath); 138 | commandsRun.add(cpResult); 139 | if (cpResult.getExitCode() > 0) { 140 | 141 | workerState.addTocPathWriteFailure( 142 | new TocPathOpResult(payload.mode, false, targetFilePath, cpCmdLine.toString(), gson.toJson(cpResult))); 143 | 144 | return; // exit 145 | } 146 | } 147 | 148 | } 149 | 150 | /******************** 151 | * HANDLE CHOWNS 152 | * AND CHMOD for 153 | * both files and dirs 154 | * why? because w/ yas3fs 155 | * "preserve" options do not 156 | * properly carry through 157 | * to s3, it needs to be explicit 158 | *****************/ 159 | 160 | /** 161 | * CHOWN 162 | */ 163 | boolean canChown = true; 164 | if (chownDirsOnly && !payload.tocInfo.isDirectory) { 165 | canChown = false; 166 | } 167 | 168 | CmdResult chownResult = null; 169 | if (chown != null && canChown) { 170 | 171 | // chown -R x:y targetFilePath 172 | CommandLine chownCmdLine = new CommandLine("chown"); 173 | chownCmdLine.addArgument(this.chown); 174 | chownCmdLine.addArgument(targetFilePath,false); 175 | 176 | chownResult = exec(1,"chown",chownCmdLine,targetFilePath); 177 | commandsRun.add(chownResult); 178 | if (chownResult.getExitCode() > 0) { 179 | workerState.addTocPathWriteFailure( 180 | new TocPathOpResult(payload.mode, false, targetFilePath, chownCmdLine.toString(), gson.toJson(chownResult))); 181 | return; // exit 182 | } 183 | } 184 | 185 | 186 | /** 187 | * CHMOD 188 | */ 189 | boolean canChmod = true; 190 | if (chmodDirsOnly && !payload.tocInfo.isDirectory) { 191 | canChmod = false; 192 | } 193 | 194 | CmdResult chmodResult = null; 195 | if (chmod != null && canChmod) { 196 | 197 | // chmod -R XXX targetFilePath 198 | CommandLine chmodCmdLine = new CommandLine("chmod"); 199 | chmodCmdLine.addArgument(this.chmod); 200 | chmodCmdLine.addArgument(targetFilePath,false); 201 | 202 | chmodResult = exec(1,"chmod",chmodCmdLine,targetFilePath); 203 | commandsRun.add(chmodResult); 204 | if (chmodResult.getExitCode() > 0) { 205 | workerState.addTocPathWriteFailure( 206 | new TocPathOpResult(payload.mode, false, targetFilePath, chmodCmdLine.toString(), gson.toJson(chmodResult))); 207 | return; // exit 208 | } 209 | 210 | } 211 | 212 | 213 | /** 214 | * Record success if we got here 215 | */ 216 | 217 | String asJson = gson.toJson(commandsRun.toArray()); 218 | 219 | workerState.addTocPathWritten( 220 | new TocPathOpResult(payload.mode, true, targetFilePath, "mkdir + rsync + ?chown + ?chmod", asJson)); 221 | 222 | 223 | /** 224 | * Do a post write validate if configured 225 | */ 226 | doPostWriteLocalValidate(payload, workerState); 227 | 228 | } 229 | 230 | private void doPostWriteLocalValidate(TOCPayload payload, WorkerState workerState) { 231 | 232 | // are we setup to do post write local validate?? 233 | if (this.postWriteLocalValidateRootDir != null && 234 | this.postWriteLocalValidateLogFile != null && 235 | this.tocPayloadValidator != null) { 236 | 237 | // SKIP directories? 238 | if (payload.tocInfo.isDirectory && 239 | this.postWriteLocalValidateSkipDirectories) { 240 | return; 241 | } 242 | 243 | // validate it 244 | TocPathOpResult result = this.tocPayloadValidator.validateLocally(payload, this.postWriteLocalValidateRootDir); 245 | 246 | // only log if not successful 247 | if (!result.success) { 248 | 249 | // log it in state 250 | workerState.addTocPathPostWriteLocalValidateFailure(result); 251 | 252 | synchronized(this.file_postWriteLocalValidateLogFile) { 253 | 254 | try { 255 | // init writer 256 | if (this.postWriteLocalValidateLogFileWriter == null) { 257 | this.postWriteLocalValidateLogFileWriter = 258 | new BufferedWriter(new FileWriter(this.file_postWriteLocalValidateLogFile)); 259 | } 260 | 261 | postWriteLocalValidateLogFileWriter.write(gson.toJson(result) + "\n"); 262 | 263 | // need to flush? 264 | long now = System.currentTimeMillis(); 265 | if ((now - this.lastValidateLogFileFlushAt) > validateLogFileFlushEveryMS) { 266 | postWriteLocalValidateLogFileWriter.flush(); 267 | this.lastValidateLogFileFlushAt = now; 268 | } 269 | 270 | } catch(Exception e) { 271 | logger.error("doPostWriteLocalValidate() unexpected error: " + e.getMessage(),e); 272 | } 273 | } 274 | 275 | } 276 | } 277 | } 278 | 279 | public void destroy() { 280 | if (this.postWriteLocalValidateLogFileWriter != null) { 281 | try { 282 | this.postWriteLocalValidateLogFileWriter.close(); 283 | } catch(Exception ignore) {} 284 | } 285 | } 286 | 287 | 288 | private CmdResult exec(int maxAttempts, 289 | String desc, 290 | CommandLine cmd, 291 | String retryExistancePathToCheck) { 292 | 293 | String cmdStr = null; 294 | CmdResult result = null; 295 | try { 296 | cmdStr = cmd.toString(); 297 | 298 | File retryExistanceCheckFile = new File(retryExistancePathToCheck); 299 | int attempts = 0; 300 | 301 | while((attempts < maxAttempts) && 302 | (result == null || result.getExitCode() > 0 || !retryExistanceCheckFile.exists())) { 303 | 304 | attempts++; 305 | logger.debug("exec() attempt#: "+attempts+ " executing "+desc+": " + cmdStr); 306 | 307 | result = executor.execute(cmd,maxAttempts); 308 | 309 | // if fail, let it breathe 310 | if (result.getExitCode() > 0) { 311 | Thread.currentThread().sleep(getRetriesSleepMS()); 312 | } 313 | } 314 | 315 | } catch(Exception e) { 316 | String msg = "exec() "+desc+" unexpected exception: " +cmdStr + " " + e.getMessage(); 317 | logger.error(msg,e); 318 | result = new CmdResult(5555, null, msg); 319 | } 320 | 321 | return result; 322 | } 323 | 324 | private boolean rsyncErrorIsTolerable(CmdResult result) { 325 | 326 | if (this.rsyncTolerableErrorsRegex == null) { 327 | return false; 328 | } 329 | 330 | String stdError = result.getStdErr(); 331 | if (stdError != null && this.rysyncTolerableErrorPattern.matcher(stdError).matches()) { 332 | logger.debug("rsyncErrorIsTolerable? TRUE: " + stdError); 333 | return true; 334 | } 335 | 336 | String stdOut = result.getStdOut(); 337 | if (stdOut != null && this.rysyncTolerableErrorPattern.matcher(stdOut).matches()) { 338 | logger.debug("rsyncErrorIsTolerable? TRUE: " + stdOut); 339 | return true; 340 | } 341 | 342 | return false; 343 | } 344 | 345 | public void setSourceDirectoryRootPath(String sourceDirectoryRootPath) { 346 | this.sourceDirectoryRootPath = sourceDirectoryRootPath; 347 | } 348 | 349 | public void setTargetDirectoryRootPath(String targetDirectoryRootPath) { 350 | this.targetDirectoryRootPath = targetDirectoryRootPath; 351 | } 352 | 353 | public void handlePayload(TOCPayload payload) throws Exception { 354 | throw new UnsupportedOperationException("RSyncInvokingTOCPayloadHandler does not " + 355 | "support this method variant, call me through Worker"); 356 | } 357 | 358 | public void setChown(String chown) { 359 | this.chown = chown; 360 | } 361 | 362 | public void setChmod(String chmod) { 363 | this.chmod = chmod; 364 | } 365 | 366 | public boolean isUseRsync() { 367 | return useRsync; 368 | } 369 | 370 | public void setUseRsync(boolean useRsync) { 371 | this.useRsync = useRsync; 372 | } 373 | 374 | public void setChownDirsOnly(boolean dirsOnly) { 375 | this.chownDirsOnly = dirsOnly; 376 | } 377 | 378 | public void setChmodDirsOnly(boolean dirsOnly) { 379 | this.chmodDirsOnly = dirsOnly; 380 | } 381 | 382 | public String getRsyncOptions() { 383 | return rsyncOptions; 384 | } 385 | 386 | public void setRsyncOptions(String rsyncOptions) { 387 | this.rsyncOptions = rsyncOptions; 388 | for (String option : rsyncOptions.split(" ")) { 389 | this.rsyncOptionsList.add(option); 390 | } 391 | } 392 | 393 | public String getRsyncTolerableErrorsRegex() { 394 | return rsyncTolerableErrorsRegex; 395 | } 396 | 397 | public void setRsyncTolerableErrorsRegex(String rsyncTolerableErrorsRegex) throws PatternSyntaxException { 398 | this.rsyncTolerableErrorsRegex = rsyncTolerableErrorsRegex; 399 | this.rysyncTolerableErrorPattern = Pattern.compile(this.rsyncTolerableErrorsRegex); 400 | logger.debug("Set rsyncTolerableErrorsRegex="+rsyncTolerableErrorsRegex); 401 | } 402 | 403 | public int getRetries() { 404 | return retries; 405 | } 406 | 407 | public void setRetries(int retries) { 408 | this.retries = retries; 409 | } 410 | 411 | public long getRetriesSleepMS() { 412 | return retriesSleepMS; 413 | } 414 | 415 | public void setRetriesSleepMS(long retriesSleepMS) { 416 | this.retriesSleepMS = retriesSleepMS; 417 | } 418 | 419 | public String getPostWriteLocalValidateRootDir() { 420 | return postWriteLocalValidateRootDir; 421 | } 422 | 423 | public void setPostWriteLocalValidateRootDir(String postWriteLocalValidateRootDir) { 424 | this.postWriteLocalValidateRootDir = postWriteLocalValidateRootDir; 425 | } 426 | 427 | public String getPostWriteLocalValidateLogFile() { 428 | return postWriteLocalValidateLogFile; 429 | } 430 | 431 | public void setPostWriteLocalValidateLogFile( 432 | String postWriteLocalValidateLogFile) { 433 | this.postWriteLocalValidateLogFile = postWriteLocalValidateLogFile; 434 | this.file_postWriteLocalValidateLogFile = new File(postWriteLocalValidateLogFile); 435 | } 436 | 437 | public TOCPayloadValidator getTocPayloadValidator() { 438 | return tocPayloadValidator; 439 | } 440 | 441 | public void setTocPayloadValidator(TOCPayloadValidator tocPayloadValidator) { 442 | this.tocPayloadValidator = tocPayloadValidator; 443 | } 444 | 445 | public void setPostWriteLocalValidateSkipDirectories(boolean postWriteLocalValidateSkipDirectories) { 446 | this.postWriteLocalValidateSkipDirectories = postWriteLocalValidateSkipDirectories; 447 | } 448 | 449 | 450 | } 451 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/S3BucketObjectLister.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | import java.util.HashSet; 4 | import java.util.List; 5 | import java.util.Queue; 6 | import java.util.Set; 7 | 8 | import org.apache.log4j.Logger; 9 | 10 | import com.amazonaws.event.ProgressEvent; 11 | import com.amazonaws.event.ProgressListener; 12 | import com.amazonaws.services.s3.AmazonS3Client; 13 | import com.amazonaws.services.s3.model.ListObjectsRequest; 14 | import com.amazonaws.services.s3.model.ObjectListing; 15 | import com.amazonaws.services.s3.model.S3ObjectSummary; 16 | 17 | public class S3BucketObjectLister implements Runnable, SourceTOCGenerator, ProgressListener { 18 | 19 | private static final Logger logger = Logger.getLogger(S3BucketObjectLister.class); 20 | 21 | private AmazonS3Client s3Client = null; 22 | private String s3BucketName = null; 23 | private boolean running = true; 24 | private int tocInfosGenerated = 0; 25 | 26 | 27 | public Set generateTOC(Queue tocQueue) throws Exception { 28 | Thread loggingThread = new Thread(this); 29 | 30 | Set toc = new HashSet(); 31 | loggingThread.start(); 32 | 33 | scanBucket(toc,tocQueue); 34 | 35 | this.running = false; // stop logging 36 | return toc; 37 | } 38 | 39 | private void scanBucket(Set toc, Queue tocQueue) throws Exception { 40 | 41 | ListObjectsRequest listRequest = new ListObjectsRequest(); 42 | listRequest.setBucketName(s3BucketName); 43 | // listRequest.setGeneralProgressListener(this); 44 | listRequest.setMaxKeys(1000); 45 | 46 | String nextMarker = null; 47 | ObjectListing objectListing = null; 48 | 49 | while(true) { 50 | 51 | objectListing = s3Client.listObjects(listRequest); 52 | 53 | List objectSummaries = objectListing.getObjectSummaries(); 54 | 55 | for (S3ObjectSummary objSummary : objectSummaries) { 56 | String key = objSummary.getKey(); 57 | 58 | TocInfo tocInfo = new TocInfo(key, objSummary.getSize()); 59 | 60 | // is it a "dir/" ? 61 | if (key.lastIndexOf("/") == (key.length() - 1)) { 62 | tocInfo.isDirectory = true; 63 | } else { 64 | tocInfo.isDirectory = false; 65 | } 66 | 67 | toc.add(tocInfo); 68 | tocQueue.add(tocInfo); 69 | tocInfosGenerated++; // increment for logging 70 | 71 | } 72 | 73 | // for pagination 74 | nextMarker = objectListing.getNextMarker(); 75 | if (nextMarker == null) { 76 | break; 77 | } else { 78 | listRequest.setMarker(nextMarker); 79 | logger.debug("scanBucket() nextMarker we will request listing for => " + nextMarker); 80 | } 81 | } 82 | 83 | } 84 | 85 | 86 | 87 | @Override 88 | public void progressChanged(ProgressEvent progressEvent) { 89 | logger.debug("progressChanged() " +progressEvent.getEventType() + 90 | " bytes:" + progressEvent.getBytes() + 91 | " bytesTransferred: " + progressEvent.getBytesTransferred()); 92 | } 93 | 94 | public void run() { 95 | while (running) { 96 | try { 97 | logger.info("\nGenerated TOC current size: " + tocInfosGenerated + "\n"); 98 | Thread.currentThread().sleep(15000); 99 | 100 | } catch(Exception ignore){} 101 | } 102 | } 103 | 104 | public AmazonS3Client getS3Client() { 105 | return s3Client; 106 | } 107 | 108 | public void setS3Client(AmazonS3Client s3Client) { 109 | this.s3Client = s3Client; 110 | } 111 | 112 | public String getS3BucketName() { 113 | return s3BucketName; 114 | } 115 | 116 | public void setS3BucketName(String s3BucketName) { 117 | this.s3BucketName = s3BucketName; 118 | } 119 | 120 | 121 | } 122 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/S3KeyCopyingTOCPayloadHandler.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | import org.apache.log4j.Logger; 4 | import org.bitsofinfo.s3.cmd.TocPathOpResult; 5 | import org.bitsofinfo.s3.worker.WorkerState; 6 | 7 | import com.amazonaws.event.ProgressEvent; 8 | import com.amazonaws.event.ProgressListener; 9 | import com.amazonaws.services.s3.AmazonS3Client; 10 | import com.amazonaws.services.s3.model.CopyObjectRequest; 11 | import com.amazonaws.services.s3.model.CopyObjectResult; 12 | import com.amazonaws.services.s3.model.StorageClass; 13 | 14 | public class S3KeyCopyingTOCPayloadHandler implements ProgressListener, TOCPayloadHandler { 15 | 16 | private static final Logger logger = Logger.getLogger(S3KeyCopyingTOCPayloadHandler.class); 17 | 18 | private String sourceS3BucketName = null; 19 | private String targetS3BucketName = null; 20 | private StorageClass storageClass = null; 21 | private boolean enableServerSideEncryption = false; 22 | private AmazonS3Client s3Client = null; 23 | 24 | @Override 25 | public void destroy() { 26 | // nothing to do 27 | } 28 | 29 | @Override 30 | public void progressChanged(ProgressEvent progressEvent) { 31 | logger.debug("progressChanged() " +progressEvent.getEventType() + 32 | " bytes:" + progressEvent.getBytes() + 33 | " bytesTransferred: " + progressEvent.getBytesTransferred()); 34 | } 35 | 36 | 37 | @Override 38 | public void handlePayload(TOCPayload payload) throws Exception { 39 | throw new UnsupportedOperationException("S3KeyCopyingTOCPayloadHandler does not " + 40 | "support this method variant, call me through Worker"); 41 | } 42 | 43 | @Override 44 | public void handlePayload(TOCPayload payload, WorkerState workerState) throws Exception { 45 | 46 | TocInfo tocInfo = payload.tocInfo; 47 | 48 | String logPrefix = "handlePayload() KeyCopy s3://" + this.sourceS3BucketName + "/" + tocInfo.path + 49 | " => s3://" + this.targetS3BucketName +"/"+ tocInfo.path; 50 | 51 | try { 52 | 53 | CopyObjectRequest copyRequest = new CopyObjectRequest(this.sourceS3BucketName, 54 | tocInfo.path, 55 | this.targetS3BucketName, 56 | tocInfo.path); 57 | copyRequest.setStorageClass(storageClass); 58 | // copyRequest.setGeneralProgressListener(this); 59 | 60 | if (this.enableServerSideEncryption) { 61 | copyRequest.putCustomRequestHeader("x-amz-server-side-encryption", "AES256"); 62 | } 63 | 64 | CopyObjectResult copyResult = s3Client.copyObject(copyRequest); 65 | 66 | logger.debug(logPrefix + " copied OK"); 67 | workerState.addTocPathWritten(new TocPathOpResult(payload.mode, true, tocInfo.path, "s3.copyKey", "OK")); 68 | 69 | } catch(Exception e) { 70 | logger.error(logPrefix + " unexpected ERROR: " + e.getMessage(),e); 71 | workerState.addTocPathWriteFailure( 72 | new TocPathOpResult(payload.mode, false, tocInfo.path, "s3.copyKey", logPrefix + " " + e.getMessage())); 73 | } 74 | 75 | } 76 | 77 | public String getSourceS3BucketName() { 78 | return sourceS3BucketName; 79 | } 80 | 81 | public void setSourceS3BucketName(String sourceS3BucketName) { 82 | this.sourceS3BucketName = sourceS3BucketName; 83 | } 84 | 85 | public String getTargetS3BucketName() { 86 | return targetS3BucketName; 87 | } 88 | 89 | public void setTargetS3BucketName(String targetS3BucketName) { 90 | this.targetS3BucketName = targetS3BucketName; 91 | } 92 | 93 | public StorageClass getStorageClass() { 94 | return storageClass; 95 | } 96 | 97 | public void setStorageClass(StorageClass storageClass) { 98 | this.storageClass = storageClass; 99 | } 100 | 101 | public boolean isEnableServerSideEncryption() { 102 | return enableServerSideEncryption; 103 | } 104 | 105 | public void setEnableServerSideEncryption(boolean enableServerSideEncryption) { 106 | this.enableServerSideEncryption = enableServerSideEncryption; 107 | } 108 | 109 | public AmazonS3Client getS3Client() { 110 | return s3Client; 111 | } 112 | 113 | public void setS3Client(AmazonS3Client s3Client) { 114 | this.s3Client = s3Client; 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/SourceTOCGenerator.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | import java.util.Queue; 4 | import java.util.Set; 5 | 6 | public interface SourceTOCGenerator { 7 | 8 | /** 9 | * Generate the TOC list. Implementor should write live/realtime 10 | * to the passed tocQueue's as well as return a static Set when complete. 11 | * 12 | * @param tocQueue 13 | * @param largeFileMinSize 14 | * @param largeFileTocQueue 15 | * @return 16 | * @throws Exception 17 | */ 18 | public Set generateTOC(Queue tocQueue) throws Exception; 19 | 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/TOCManifestBasedGenerator.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileReader; 6 | import java.util.HashSet; 7 | import java.util.Queue; 8 | import java.util.Set; 9 | 10 | import org.apache.log4j.Logger; 11 | 12 | /** 13 | * Reads a TOC file that lists paths relative from root 14 | * dir and uses that to create TOCInfo objects (after determining the filesize on disk) 15 | * 16 | */ 17 | public class TOCManifestBasedGenerator implements SourceTOCGenerator, Runnable { 18 | 19 | private static final Logger logger = Logger.getLogger(DirectoryCrawler.class); 20 | 21 | private File rootDir = null; 22 | private File manifestFile = null; 23 | private boolean running = true; 24 | private int tocInfosGenerated = 0; 25 | 26 | public TOCManifestBasedGenerator() {} 27 | 28 | public TOCManifestBasedGenerator(File rootDir) { 29 | this.rootDir = rootDir; 30 | } 31 | 32 | public void setRootDir(File rootDir) { 33 | this.rootDir = rootDir; 34 | if (!rootDir.exists()) { 35 | throw new RuntimeException("TOCManifestBasedGenerator invalid rootDir: " + rootDir.getAbsolutePath()); 36 | } 37 | } 38 | 39 | public void setManifestFile(File manifestFile) { 40 | this.manifestFile = manifestFile; 41 | if (!manifestFile.exists()) { 42 | throw new RuntimeException("TOCManifestBasedGenerator invalid manifestFile: " + manifestFile.getAbsolutePath()); 43 | } 44 | } 45 | 46 | public Set generateTOC(Queue tocQueue) throws Exception { 47 | Thread loggingThread = new Thread(this); 48 | 49 | Set toc = new HashSet(); 50 | loggingThread.start(); 51 | 52 | BufferedReader reader = new BufferedReader(new FileReader(manifestFile)); 53 | String line = null; 54 | while ((line = reader.readLine()) != null) { 55 | 56 | if (line.trim().isEmpty()) { 57 | continue; 58 | } 59 | 60 | File tocEntry = new File(rootDir.getAbsolutePath() + line.trim()); 61 | 62 | if (tocEntry.exists()) { 63 | 64 | String adjustedPath = tocEntry.getAbsolutePath().replace(this.rootDir.getAbsolutePath(), ""); 65 | TocInfo finfo = new TocInfo(adjustedPath, (tocEntry.isFile() ? tocEntry.length() : 0)); 66 | finfo.setIsDirectory(tocEntry.isDirectory()); 67 | toc.add(finfo); 68 | tocQueue.add(finfo); 69 | tocInfosGenerated++; // increment for logging 70 | 71 | } else { 72 | logger.warn("generateTOC() file referenced in manifest file: " + tocEntry.getAbsolutePath() + " does not exist!"); 73 | } 74 | } 75 | 76 | reader.close(); 77 | 78 | this.running = false; // stop logging 79 | return toc; 80 | } 81 | 82 | public void run() { 83 | while (running) { 84 | try { 85 | logger.info("Generated TOC current size: " + tocInfosGenerated); 86 | Thread.currentThread().sleep(30000); 87 | 88 | } catch(Exception ignore){} 89 | } 90 | } 91 | 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/TOCPayload.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | public class TOCPayload { 4 | 5 | public static enum MODE {WRITE, VALIDATE} 6 | 7 | public MODE mode = null; 8 | public TocInfo tocInfo = null; 9 | 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/TOCPayloadHandler.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | import org.bitsofinfo.s3.worker.WorkerState; 4 | 5 | public interface TOCPayloadHandler { 6 | 7 | public void destroy(); 8 | public void handlePayload(TOCPayload payload) throws Exception; 9 | public void handlePayload(TOCPayload payload, WorkerState workerState) throws Exception; 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/TOCPayloadValidator.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | import java.io.File; 4 | 5 | import org.apache.log4j.Logger; 6 | import org.bitsofinfo.s3.cmd.TocPathOpResult; 7 | 8 | import com.amazonaws.services.s3.AmazonS3Client; 9 | import com.amazonaws.services.s3.model.AmazonS3Exception; 10 | import com.amazonaws.services.s3.model.ObjectMetadata; 11 | 12 | public class TOCPayloadValidator { 13 | 14 | private static final Logger logger = Logger.getLogger(TOCPayloadValidator.class); 15 | 16 | private AmazonS3Client s3Client = null; 17 | private String s3BucketName = null; 18 | 19 | private static String toc2Key(String tocPath, boolean isDir) { 20 | 21 | String key = tocPath; 22 | 23 | // strip leading / 24 | if (key.startsWith("/")) { 25 | key = key.substring(1,key.length()); 26 | } 27 | 28 | if (isDir) { 29 | if (!key.endsWith("/")) { 30 | key += "/"; 31 | } 32 | } 33 | 34 | return key; 35 | } 36 | 37 | /** 38 | * Validates the file locally on disk 39 | * 40 | * @param payload 41 | * @return 42 | */ 43 | public TocPathOpResult validateLocally(TOCPayload payload, String rootDirPath) { 44 | 45 | String targetPath = null; 46 | 47 | try { 48 | targetPath = (rootDirPath + payload.tocInfo.getPath()).replaceAll("//", "/"); 49 | 50 | logger.debug("validateLocally() " + targetPath); 51 | 52 | File toCheck = new File(targetPath); 53 | 54 | boolean exists = false; 55 | 56 | // does it exist? 57 | int attempts = 0; 58 | int maxAttempts = 5; 59 | while(attempts < maxAttempts) { 60 | attempts++; 61 | 62 | if (toCheck.exists()) { 63 | exists = true; 64 | break; 65 | } 66 | 67 | Thread.currentThread().sleep(2000); 68 | } 69 | 70 | if (!exists) { 71 | logger.error("validateLocally() File validation failed, path does not exist! " + targetPath); 72 | return new TocPathOpResult(payload.mode, false, targetPath, "local.check.exists", "!exists"); 73 | } 74 | 75 | 76 | // TOC says directory but local does not?? error 77 | if (payload.tocInfo.isDirectory && !toCheck.isDirectory()) { 78 | logger.error("validateLocally() Path validation failed, TOC states path should be directory, local fs does not! " + targetPath);; 79 | return new TocPathOpResult(payload.mode, false, targetPath, "local.check.TOCDir_isa_LocalDir", "false"); 80 | } 81 | 82 | // otherwise... must be a file, check size 83 | if (toCheck.isFile()) { 84 | 85 | if (toCheck.length() != payload.tocInfo.getSize()) { 86 | 87 | logger.error("validateLocally() File validation failed, file size does not match! " + 88 | "" + targetPath + " expected:" + payload.tocInfo.size + " actual:" + toCheck.length());; 89 | 90 | return new TocPathOpResult(payload.mode, false, targetPath, 91 | "local.check.file_size", "expected:"+ payload.tocInfo.size + " actual:"+toCheck.length()); 92 | } 93 | } 94 | 95 | // SUCCESS! if we got here we are OK 96 | return new TocPathOpResult(payload.mode, true, targetPath, "local.check.exists", "ok"); 97 | 98 | 99 | } catch(Exception e) { 100 | logger.error("validateLocally() Unexpected exception: " + e.getMessage(),e); 101 | return new TocPathOpResult(payload.mode, false, targetPath, "local.check.error", "exception: " + e.getMessage()); 102 | 103 | } 104 | } 105 | 106 | public TocPathOpResult validateOnS3(TOCPayload payload) { 107 | 108 | if (s3Client == null || s3BucketName == null) { 109 | throw new RuntimeException("Cannot validateOnS3(), TOCPayloadValidator is not configured w/ s3Client or bucket name"); 110 | } 111 | 112 | try { 113 | String keyToCheck = toc2Key(payload.tocInfo.getPath(),payload.tocInfo.isDirectory); 114 | logger.debug("validateOnS3() " + keyToCheck); 115 | 116 | ObjectMetadata md = s3Client.getObjectMetadata(getS3BucketName(), keyToCheck); 117 | 118 | // size not match! 119 | if (payload.tocInfo.size != md.getContentLength()) { 120 | 121 | logger.error("validateOnS3() S3 object length does not match! " + 122 | "" + keyToCheck + " expected:" + payload.tocInfo.size + " actual:" + md.getContentLength());; 123 | 124 | return new TocPathOpResult(payload.mode, false, payload.tocInfo.getPath(), 125 | "s3.check.content.length", "expected:"+ payload.tocInfo.size + " actual:"+md.getContentLength()); 126 | 127 | } 128 | 129 | // SUCCESS (no 404 so size matches and it exists) 130 | return new TocPathOpResult(payload.mode, true, payload.tocInfo.getPath(), "s3.check", "ok"); 131 | 132 | } catch(AmazonS3Exception e) { 133 | 134 | // 404 135 | if (e.getStatusCode() == 404) { 136 | 137 | logger.error("validateOnS3() " + payload.tocInfo.getPath() + " s3check returned 404"); 138 | 139 | return new TocPathOpResult(payload.mode, false, payload.tocInfo.getPath(), 140 | "s3.check.404", "key not found 404 at " + this.getS3BucketName()); 141 | 142 | // other error 143 | } else { 144 | 145 | logger.error("validateOnS3() " + payload.tocInfo.getPath() + " unexpected error: " + e.getMessage(),e); 146 | 147 | return new TocPathOpResult(payload.mode, false, payload.tocInfo.getPath(), 148 | "s3.check.error", "error getting object metadata: " + e.getMessage()); 149 | } 150 | } 151 | 152 | } 153 | 154 | public AmazonS3Client getS3Client() { 155 | return s3Client; 156 | } 157 | 158 | public void setS3Client(AmazonS3Client s3Client) { 159 | this.s3Client = s3Client; 160 | } 161 | 162 | public String getS3BucketName() { 163 | return s3BucketName; 164 | } 165 | 166 | public void setS3BucketName(String s3BucketName) { 167 | this.s3BucketName = s3BucketName; 168 | } 169 | 170 | } 171 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/TOCQueue.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | import java.net.InetAddress; 4 | import java.util.ArrayList; 5 | import java.util.Collection; 6 | import java.util.List; 7 | import java.util.Random; 8 | import java.util.UUID; 9 | 10 | import org.apache.log4j.Logger; 11 | import org.bitsofinfo.s3.toc.TOCPayload.MODE; 12 | 13 | import com.amazonaws.AmazonClientException; 14 | import com.amazonaws.auth.BasicAWSCredentials; 15 | import com.amazonaws.services.sqs.AmazonSQSClient; 16 | import com.amazonaws.services.sqs.model.CreateQueueResult; 17 | import com.amazonaws.services.sqs.model.DeleteMessageBatchRequest; 18 | import com.amazonaws.services.sqs.model.DeleteMessageBatchRequestEntry; 19 | import com.amazonaws.services.sqs.model.ListQueuesResult; 20 | import com.amazonaws.services.sqs.model.Message; 21 | import com.amazonaws.services.sqs.model.ReceiveMessageRequest; 22 | import com.amazonaws.services.sqs.model.ReceiveMessageResult; 23 | import com.google.gson.Gson; 24 | 25 | public class TOCQueue implements Runnable { 26 | 27 | private static final Logger logger = Logger.getLogger(TOCQueue.class); 28 | 29 | private AmazonSQSClient sqsClient = null; 30 | private String tocQueueUrl = null; 31 | 32 | private Thread consumerThread = null; 33 | 34 | private Gson gson = new Gson(); 35 | 36 | private TOCPayloadHandler tocPayloadHandler = null; 37 | private String mySourceIdentifier = null; 38 | private boolean canDestroyQueue = false; 39 | private String sqsQueueName = null; 40 | private String myId = UUID.randomUUID().toString().replaceAll("-", "").substring(0,6); 41 | private boolean running = true; 42 | private boolean paused = false; 43 | 44 | private long lastSQSMessageReceivedMS = -1; 45 | private int totalMessagesProcessed = 0; 46 | private int totalMessageRequestsMade = 0; 47 | private boolean currentlyProcessingMessage = false; 48 | 49 | public TOCQueue(boolean isConsumer, String awsAccessKey, String awsSecretKey, String sqsQueueName, TOCPayloadHandler tocPayloadHandler) throws Exception { 50 | super(); 51 | 52 | mySourceIdentifier = determineHostName() + "-" + UUID.randomUUID().toString().replace("-", "").substring(0,4); 53 | this.sqsQueueName = sqsQueueName; 54 | this.tocPayloadHandler = tocPayloadHandler; 55 | 56 | if (!isConsumer) { // then I am the master... 57 | canDestroyQueue = true; 58 | this.sqsQueueName += "-" + mySourceIdentifier; 59 | } 60 | 61 | 62 | sqsClient = new AmazonSQSClient(new BasicAWSCredentials(awsAccessKey, awsSecretKey)); 63 | 64 | 65 | connectToQueue(isConsumer, 1000); 66 | 67 | if (isConsumer) { 68 | this.consumerThread = new Thread(this,"TOCQueue["+myId+"] msg consumer thread"); 69 | } 70 | 71 | logger.info("\n-------------------------------------------\n" + 72 | "TOC Queue["+myId+"]: ALL SQS resources hooked up OK: "+this.tocQueueUrl+"\n" + 73 | "-------------------------------------------\n"); 74 | } 75 | 76 | public Long getLastMsgReceivedTimeMS() { 77 | return this.lastSQSMessageReceivedMS; 78 | } 79 | 80 | public void start() { 81 | this.consumerThread.start(); 82 | this.paused = false; 83 | } 84 | 85 | public void pauseConsuming() { 86 | this.paused = true; 87 | } 88 | 89 | public void resumeConsuming() { 90 | this.lastSQSMessageReceivedMS = System.currentTimeMillis(); // set to now. 91 | this.totalMessageRequestsMade = 0; // reset to zero 92 | this.paused = false; 93 | } 94 | 95 | public boolean isPaused() { 96 | return this.paused; 97 | } 98 | 99 | /** 100 | * Note here we attempt to the TOCQueue which may take some time to be shown as available 101 | * @param isConsumer 102 | * @param maxAttempts 103 | * @throws Exception 104 | */ 105 | public void connectToQueue(boolean isConsumer, int maxAttempts) throws Exception{ 106 | 107 | for (int i=0; i messages = msgResult.getMessages(); 162 | 163 | DeleteMessageBatchRequest deleteRequest = new DeleteMessageBatchRequest(); 164 | deleteRequest.setQueueUrl(tocQueueUrl); 165 | Collection entries = new ArrayList(); 166 | for (Message msg : messages) { 167 | entries.add(new DeleteMessageBatchRequestEntry() 168 | .withId(msg.getReceiptHandle()) 169 | .withReceiptHandle(msg.getReceiptHandle())); 170 | } 171 | 172 | if (entries.size() > 0) { 173 | // delete batch 174 | deleteRequest.setEntries(entries); 175 | sqsClient.deleteMessageBatch(deleteRequest); 176 | } 177 | 178 | // if ok inc, total removed 179 | totalRemoved += entries.size(); 180 | 181 | logger.trace("emptyTOCQueue() purging completed!"); 182 | return totalRemoved; 183 | 184 | } catch(AmazonClientException e) { 185 | logger.error("Error in emptyTOCQueue() (aws error): " + e.getMessage()); 186 | 187 | } catch(Exception e) { 188 | logger.error("Error in emptyTOCQueue(): " + e.getMessage(),e); 189 | } 190 | 191 | return 0; 192 | } 193 | 194 | public void run() { 195 | 196 | Random rand = new Random(); 197 | 198 | while(this.running) { 199 | 200 | if (!this.paused) { 201 | try { 202 | this.totalMessageRequestsMade++; 203 | 204 | ReceiveMessageRequest req = new ReceiveMessageRequest(); 205 | req.setWaitTimeSeconds(10); 206 | req.setQueueUrl(this.tocQueueUrl); 207 | 208 | // 30 minutes it will be invisible to other consumers 209 | // this should be enought time for the tocPayloadHandler to 210 | // complete and then we delete the message 211 | req.setVisibilityTimeout(600*3); 212 | req.setMaxNumberOfMessages(1); // only one at a time.. 213 | 214 | ReceiveMessageResult msgResult = sqsClient.receiveMessage(req); 215 | List messages = msgResult.getMessages(); 216 | 217 | for (Message msg : messages) { 218 | 219 | this.currentlyProcessingMessage = true; 220 | this.lastSQSMessageReceivedMS = System.currentTimeMillis(); 221 | this.totalMessagesProcessed++; 222 | 223 | logger.debug("TOCQueue["+myId+"] Received SQS Message " + 224 | "body (json -> TOCPayload) body= " + msg.getBody()); 225 | TOCPayload payload = null; 226 | 227 | try { 228 | payload = gson.fromJson(msg.getBody(), TOCPayload.class); 229 | 230 | } catch(Exception e) { 231 | logger.error("TOCQueue["+myId+"] ERROR: unexpected error converting SQS Message " + 232 | "body (json -> TOCPayload) body= " + msg.getBody()+ " error="+e.getMessage()); 233 | 234 | // delete the message we just analyzed 235 | sqsClient.deleteMessage(tocQueueUrl, msg.getReceiptHandle()); 236 | 237 | continue; 238 | } 239 | 240 | logger.debug("TOCQueue["+myId+"] TOCPayload received: filePath:" + payload.tocInfo.getPath()); 241 | 242 | // handle 243 | this.tocPayloadHandler.handlePayload(payload); 244 | 245 | // delete the message, got here no exception 246 | sqsClient.deleteMessage(tocQueueUrl, msg.getReceiptHandle()); 247 | 248 | // set to false, we are done processing message 249 | this.currentlyProcessingMessage = false; 250 | 251 | } 252 | 253 | } catch(Exception e) { 254 | logger.error("TOCQueue["+myId+"] run() unexpected error in handling TOCPayload: " + e.getMessage(),e); 255 | 256 | // set to false, we are done processing message 257 | this.currentlyProcessingMessage = false; 258 | } 259 | } 260 | 261 | try { 262 | Thread.currentThread().sleep(rand.nextInt(1000)); 263 | } catch(Exception ignore) {} 264 | 265 | } 266 | } 267 | 268 | public void stopConsuming() { 269 | this.running = false; 270 | } 271 | 272 | public void destroy() throws Exception { 273 | 274 | Thread.currentThread().sleep(30000); 275 | 276 | try { 277 | if (canDestroyQueue) { 278 | logger.debug("TOCQueue["+myId+"] destroy() " + this.tocQueueUrl); 279 | this.sqsClient.deleteQueue(this.tocQueueUrl); 280 | } 281 | } catch(Exception e) { 282 | logger.error("TOCQueue["+myId+"] destroy() error deleting TOCQueue: " + e.getMessage(),e); 283 | } 284 | } 285 | 286 | public AmazonSQSClient getSqsClient() { 287 | return sqsClient; 288 | } 289 | public void setSqsClient(AmazonSQSClient sqsClient) { 290 | this.sqsClient = sqsClient; 291 | } 292 | public String getTocQueueUrl() { 293 | return tocQueueUrl; 294 | } 295 | public void setTocQueueUrl(String tocQueueUrl) { 296 | this.tocQueueUrl = tocQueueUrl; 297 | } 298 | 299 | public void sendMessage(String messageBody) { 300 | this.sqsClient.sendMessage(this.tocQueueUrl, messageBody); 301 | } 302 | 303 | private static String determineHostName() throws Exception { 304 | 305 | InetAddress addr = InetAddress.getLocalHost(); 306 | 307 | // Get IP Address 308 | byte[] ipAddr = addr.getAddress(); 309 | // Get sourceHost 310 | String tmpHost = addr.getHostName(); 311 | 312 | // we only care about the HOST portion, strip everything else 313 | // as some boxes report a fully qualified sourceHost such as 314 | // host.domainname.com 315 | 316 | int firstDot = tmpHost.indexOf('.'); 317 | if (firstDot != -1) { 318 | tmpHost = tmpHost.substring(0,firstDot); 319 | } 320 | return tmpHost; 321 | 322 | } 323 | 324 | public int getTotalMessagesProcessed() { 325 | return totalMessagesProcessed; 326 | } 327 | 328 | public boolean isCurrentlyProcessingMessage() { 329 | return currentlyProcessingMessage; 330 | } 331 | 332 | public int getTotalMessageRequestsMade() { 333 | return this.totalMessageRequestsMade; 334 | } 335 | 336 | } 337 | 338 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/TocInfo.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | public class TocInfo { 4 | 5 | public String path = null; 6 | public boolean isDirectory = false; 7 | public long size = 0; 8 | 9 | public String getPath() { 10 | return path; 11 | } 12 | public void setPath(String path) { 13 | this.path = path; 14 | } 15 | public long getSize() { 16 | return size; 17 | } 18 | public void setSize(long size) { 19 | this.size = size; 20 | } 21 | 22 | public TocInfo(String filePath, long size) { 23 | super(); 24 | this.path = filePath; 25 | this.size = size; 26 | } 27 | 28 | public boolean isDirectory() { 29 | return isDirectory; 30 | } 31 | 32 | public void setIsDirectory(boolean isDirectory) { 33 | this.isDirectory = isDirectory; 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/toc/ValidatingTOCPayloadHandler.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.toc; 2 | 3 | import org.apache.log4j.Logger; 4 | import org.bitsofinfo.s3.cmd.TocPathOpResult; 5 | import org.bitsofinfo.s3.worker.WorkerState; 6 | 7 | import com.amazonaws.services.s3.AmazonS3Client; 8 | 9 | public class ValidatingTOCPayloadHandler implements TOCPayloadHandler { 10 | 11 | private static final Logger logger = Logger.getLogger(ValidatingTOCPayloadHandler.class); 12 | 13 | private String targetDirectoryRootPath = null; 14 | 15 | public static enum MODE { validateEverywhere, validateLocallyOnly, validateS3Only, validateLocallyThenS3OnFailure } 16 | 17 | private MODE validateMode = MODE.validateLocallyThenS3OnFailure; 18 | 19 | private TOCPayloadValidator validator = new TOCPayloadValidator(); 20 | 21 | public ValidatingTOCPayloadHandler() { 22 | 23 | } 24 | 25 | public void destroy() {} 26 | 27 | public void handlePayload(TOCPayload payload, WorkerState workerState) throws Exception { 28 | 29 | try { 30 | 31 | // VALIDATE LOCAL first, then S3 32 | if (validateMode == MODE.validateLocallyThenS3OnFailure) { 33 | 34 | TocPathOpResult localCheck = validator.validateLocally(payload,this.targetDirectoryRootPath); 35 | 36 | if (localCheck.success) { 37 | workerState.addTocPathValidated(localCheck); 38 | 39 | // failed? check s3 40 | } else { 41 | logger.error("validateLocally() failed, falling back to S3 check..." + payload.tocInfo.getPath()); 42 | TocPathOpResult s3Check = validator.validateOnS3(payload); 43 | 44 | if (s3Check.success) { 45 | workerState.addTocPathValidated(s3Check); 46 | 47 | // both failed.... 48 | } else { 49 | workerState.addTocPathValidateFailure(new TocPathOpResult(payload.mode, false, payload.tocInfo.path, 50 | "localFS["+localCheck.success+"]_then_s3["+s3Check.success+"]", "failed: s3["+s3Check.message+"] local["+localCheck.message+"]")); 51 | } 52 | 53 | } 54 | 55 | return; 56 | } 57 | 58 | // VALIDATE BOTH 59 | if (validateMode == MODE.validateEverywhere) { 60 | TocPathOpResult localCheck = validator.validateLocally(payload,this.targetDirectoryRootPath); 61 | TocPathOpResult s3Check = validator.validateOnS3(payload); 62 | 63 | if (localCheck.success && s3Check.success) { 64 | workerState.addTocPathValidated(new TocPathOpResult(payload.mode, true, payload.tocInfo.path, "localFS_and_s3", "both validated ok")); 65 | } else { 66 | workerState.addTocPathValidateFailure(new TocPathOpResult(payload.mode, false, payload.tocInfo.path, 67 | "localFS["+localCheck.success+"]_and_s3["+s3Check.success+"]", "failed: s3["+s3Check.message+"] local["+localCheck.message+"]")); 68 | } 69 | 70 | return; 71 | } 72 | 73 | // VALIDATE LOCAL ONLY 74 | if (validateMode == MODE.validateLocallyOnly) { 75 | TocPathOpResult localCheck = validator.validateLocally(payload,this.targetDirectoryRootPath); 76 | 77 | if (localCheck.success) { 78 | workerState.addTocPathValidated(localCheck); 79 | } else { 80 | workerState.addTocPathValidateFailure(localCheck); 81 | } 82 | 83 | return; 84 | } 85 | 86 | 87 | // VALIDATE S3 ONLY 88 | if (validateMode == MODE.validateS3Only) { 89 | TocPathOpResult s3Check = validator.validateOnS3(payload); 90 | 91 | if (s3Check.success) { 92 | workerState.addTocPathValidated(s3Check); 93 | } else { 94 | workerState.addTocPathValidateFailure(s3Check); 95 | } 96 | 97 | return; 98 | } 99 | 100 | 101 | } catch(Exception e) { 102 | 103 | workerState.addTocPathValidateFailure( 104 | new TocPathOpResult(payload.mode, false, payload.tocInfo.path, "validation_error", "exception: " + e.getMessage())); 105 | 106 | logger.error("File validation exception: " + e.getMessage(),e); 107 | } 108 | } 109 | 110 | 111 | public void setTargetDirectoryRootPath(String targetDirectoryRootPath) { 112 | this.targetDirectoryRootPath = targetDirectoryRootPath; 113 | } 114 | 115 | 116 | public void handlePayload(TOCPayload payload) throws Exception { 117 | throw new UnsupportedOperationException("ValidatingTOCPayloadHandler does not " + 118 | "support this method variant, call me through Worker"); 119 | } 120 | 121 | public AmazonS3Client getS3Client() { 122 | return validator.getS3Client(); 123 | } 124 | 125 | public void setS3Client(AmazonS3Client s3Client) { 126 | validator.setS3Client(s3Client); 127 | } 128 | 129 | public String getS3BucketName() { 130 | return validator.getS3BucketName(); 131 | } 132 | 133 | public void setS3BucketName(String s3BucketName) { 134 | validator.setS3BucketName(s3BucketName); 135 | } 136 | 137 | 138 | public MODE getValidateMode() { 139 | return validateMode; 140 | } 141 | 142 | 143 | public void setValidateMode(MODE validateMode) { 144 | this.validateMode = validateMode; 145 | } 146 | 147 | } 148 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/util/CompressUtil.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.util; 2 | 3 | import java.io.ByteArrayOutputStream; 4 | import java.io.IOException; 5 | import java.util.zip.Deflater; 6 | import java.util.zip.Inflater; 7 | 8 | import com.amazonaws.util.Base64; 9 | 10 | public class CompressUtil { 11 | 12 | public static char[] decompressAndB64DecodeUTF8Bytes(byte[] b64EncodedCompressedBytes) throws Exception { 13 | 14 | byte[] input = Base64.decode(b64EncodedCompressedBytes); 15 | 16 | // Compressor with highest level of compression 17 | Inflater inflater = new Inflater(); 18 | 19 | // Give the compressor the data to compress 20 | inflater.setInput(input); 21 | 22 | ByteArrayOutputStream stream = new ByteArrayOutputStream(); 23 | byte[] buf = new byte[32]; 24 | while (!inflater.finished()) { 25 | int count = inflater.inflate(buf); 26 | stream.write(buf, 0, count); 27 | } 28 | return new String(stream.toByteArray(),"UTF-8").toCharArray(); 29 | } 30 | 31 | public static String compressAndB64EncodeUTF8Bytes(byte[] bytes) throws Exception{ 32 | 33 | byte[] input = bytes; 34 | 35 | // Compressor with highest level of compression 36 | Deflater compressor = new Deflater(); 37 | compressor.setLevel(Deflater.BEST_COMPRESSION); 38 | 39 | // Give the compressor the data to compress 40 | compressor.setInput(input); 41 | compressor.finish(); 42 | 43 | // Create an expandable byte array to hold the compressed data. 44 | // It is not necessary that the compressed data will be smaller than 45 | // the uncompressed data. 46 | ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); 47 | 48 | // Compress the data 49 | byte[] buf = new byte[32]; 50 | while (!compressor.finished()) { 51 | int count = compressor.deflate(buf); 52 | bos.write(buf, 0, count); 53 | } 54 | try { 55 | bos.close(); 56 | } catch (IOException e) { 57 | } 58 | 59 | // Get the compressed data 60 | byte[] compressedData = bos.toByteArray(); 61 | 62 | return new String(Base64.encode(compressedData),"UTF-8"); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/worker/ErrorReport.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.worker; 2 | 3 | import java.util.List; 4 | import java.util.Set; 5 | 6 | import org.bitsofinfo.s3.cmd.TocPathOpResult; 7 | 8 | public class ErrorReport { 9 | 10 | public String id = null; 11 | public String ip = null; 12 | public List failedWrites = null; 13 | public List failedValidates = null; 14 | public List errorsTolerated = null; 15 | public Set writeMonitorErrors = null; 16 | public List failedPostWriteLocalValidates = null; 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/worker/ResultSummary.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.worker; 2 | 3 | public class ResultSummary { 4 | 5 | public boolean TOCConsumptionPaused = false; 6 | public int ok; 7 | public int failed; 8 | public int errorsTolerated; 9 | public int writeMonitorErrors; 10 | public int postWriteLocalValidateErrors; 11 | public int total; 12 | 13 | 14 | public ResultSummary(boolean TOCConsumptionPaused, int ok, int failed, 15 | int errorsTolerated, int writeMonitorErrors, 16 | int postWriteLocalValidateErrors, int total) { 17 | super(); 18 | this.TOCConsumptionPaused = TOCConsumptionPaused; 19 | this.ok = ok; 20 | this.failed = failed; 21 | this.errorsTolerated = errorsTolerated; 22 | this.writeMonitorErrors = writeMonitorErrors; 23 | this.total = total; 24 | this.postWriteLocalValidateErrors = postWriteLocalValidateErrors; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/worker/Worker.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.worker; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Arrays; 5 | import java.util.HashMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Properties; 9 | 10 | import org.apache.commons.exec.CommandLine; 11 | import org.apache.commons.exec.DefaultExecutor; 12 | import org.apache.log4j.Logger; 13 | import org.bitsofinfo.s3.S3Util; 14 | import org.bitsofinfo.s3.control.CCMode; 15 | import org.bitsofinfo.s3.control.CCPayload; 16 | import org.bitsofinfo.s3.control.CCPayloadHandler; 17 | import org.bitsofinfo.s3.control.CCPayloadType; 18 | import org.bitsofinfo.s3.control.ControlChannel; 19 | import org.bitsofinfo.s3.master.ShutdownInfo; 20 | import org.bitsofinfo.s3.toc.FileCopyTOCPayloadHandler; 21 | import org.bitsofinfo.s3.toc.S3KeyCopyingTOCPayloadHandler; 22 | import org.bitsofinfo.s3.toc.TOCPayload; 23 | import org.bitsofinfo.s3.toc.TOCPayload.MODE; 24 | import org.bitsofinfo.s3.toc.TOCPayloadHandler; 25 | import org.bitsofinfo.s3.toc.TOCPayloadValidator; 26 | import org.bitsofinfo.s3.toc.TOCQueue; 27 | import org.bitsofinfo.s3.toc.ValidatingTOCPayloadHandler; 28 | import org.bitsofinfo.s3.util.CompressUtil; 29 | import org.bitsofinfo.s3.yas3fs.Yas3fsS3UploadMonitor; 30 | import org.springframework.util.StringUtils; 31 | 32 | import com.amazonaws.auth.BasicAWSCredentials; 33 | import com.amazonaws.services.s3.AmazonS3Client; 34 | import com.amazonaws.services.s3.model.StorageClass; 35 | import com.google.common.base.Splitter; 36 | import com.google.gson.Gson; 37 | import com.google.gson.GsonBuilder; 38 | 39 | 40 | public class Worker implements TOCPayloadHandler, CCPayloadHandler, Runnable { 41 | 42 | private static final Logger logger = Logger.getLogger(Worker.class); 43 | 44 | private List tocQueueConsumers = new ArrayList(); 45 | 46 | private ControlChannel controlChannel = null; 47 | private Thread selfMonitorThread = null; 48 | private Map mode2TOCHandlerMap = null; 49 | 50 | // if no messages received on TOCQueue in 5m declare as idle 51 | private long declareWorkerIdleAtMinLastMsgReceivedMS = 60000; 52 | 53 | private String sqsQueueName = null; 54 | private Integer totalConsumerThreads = null; 55 | private String awsAccessKey = null; 56 | private String awsSecretKey = null; 57 | private WorkerState myWorkerState = null; 58 | private Gson gson = new Gson(); 59 | 60 | private WriteMonitor writeMonitor = null; 61 | private WriteErrorMonitor writeErrorMonitor = null; 62 | private WriteBackoffMonitor writeBackoffMonitor = null; 63 | 64 | private Properties properties = null; 65 | 66 | private long initializedLastSentAtMS = -1; 67 | 68 | private long sendCurrentSummariesEveryMS = 60000; 69 | private long currentSummaryLastSentAtMS = -1; 70 | 71 | private boolean tocQueueConsumersArePaused = false; 72 | private int consumerThreadMinRequestsBeforeIdle = 60; 73 | 74 | private ShutdownInfo shutdownInfo = null; 75 | private S3Util s3util = null; 76 | private AmazonS3Client s3Client = null; 77 | 78 | 79 | public Worker(Properties props) { 80 | 81 | try { 82 | 83 | this.s3util = new S3Util(); 84 | 85 | this.properties = props; 86 | 87 | String snsControlTopicName = props.getProperty("aws.sns.control.topic.name"); 88 | this.awsAccessKey = props.getProperty("aws.access.key"); 89 | this.awsSecretKey = props.getProperty("aws.secret.key"); 90 | String userAccountPrincipalId = props.getProperty("aws.account.principal.id"); 91 | String userARN = props.getProperty("aws.user.arn"); 92 | 93 | this.sqsQueueName = props.getProperty("aws.sqs.queue.name"); 94 | 95 | this.totalConsumerThreads = Integer.valueOf(props.getProperty("worker.toc.consumer.threads.num")); 96 | 97 | this.consumerThreadMinRequestsBeforeIdle = Integer.valueOf(props.getProperty("worker.toc.consumer.threads.min.requests.before.idle")); 98 | 99 | this.s3Client = new AmazonS3Client(new BasicAWSCredentials(this.awsAccessKey, this.awsSecretKey)); 100 | 101 | mode2TOCHandlerMap = initTOCPayloadHandlers(props); 102 | 103 | // handle init command 104 | runInitOrDestroyCommand("initialize",props); 105 | 106 | // write monitor (optional) 107 | initWriteMonitor(props); 108 | 109 | // backoff monitor (optional) 110 | initWriteBackoffMonitor(props); 111 | 112 | // error monitor (optional) 113 | initWriteErrorMonitor(props); 114 | 115 | // spawn control channel 116 | controlChannel = new ControlChannel(false, awsAccessKey, awsSecretKey, snsControlTopicName, userAccountPrincipalId, userARN, this); 117 | 118 | // for tracking my info 119 | this.myWorkerState = new WorkerState( 120 | controlChannel.getMySourceIdentifier(), 121 | controlChannel.getMySourceIp()); 122 | 123 | 124 | // let master know we are initialized 125 | myWorkerState.setCurrentMode(CCMode.INITIALIZED); 126 | sendInitializedState(); 127 | 128 | // monitors our state... 129 | selfMonitorThread = new Thread(this); 130 | selfMonitorThread.start(); 131 | 132 | } catch(Exception e) { 133 | logger.error("Worker() unexpected error: " + e.getMessage(),e); 134 | destroy(); 135 | } 136 | } 137 | 138 | private void sendInitializedState() throws Exception{ 139 | this.initializedLastSentAtMS = System.currentTimeMillis(); 140 | this.controlChannel.send(false, CCPayloadType.WORKER_CURRENT_MODE, CCMode.INITIALIZED); 141 | } 142 | 143 | private void runInitOrDestroyCommand(String mode, Properties props) throws Exception { 144 | // Initialization command and environment vars 145 | String initCmd = props.getProperty("worker."+mode+".cmd"); 146 | String initCmdEnv = props.getProperty("worker."+mode+".cmd.env"); 147 | 148 | if (initCmd != null) { 149 | 150 | Map env = null; 151 | if (initCmdEnv != null) { 152 | env = Splitter.on(",").withKeyValueSeparator("=").split(initCmdEnv); 153 | } 154 | 155 | // execute it! 156 | logger.debug("Running "+mode+" command: " + initCmd); 157 | CommandLine cmdLine = CommandLine.parse(initCmd); 158 | DefaultExecutor executor = new DefaultExecutor(); 159 | executor.execute(cmdLine, env); 160 | 161 | } 162 | } 163 | 164 | private void runPreValidateModeCommands(Properties props) throws Exception { 165 | 166 | // pre-validate command and environment vars 167 | String preValCmd = props.getProperty("worker.pre.validate.cmd"); 168 | String preValCmdEnv = props.getProperty("worker.pre.validate.cmd.env"); 169 | 170 | // note that either of these can reference %worker.initialize.cmd% and/or %worker.initialize.cmd.env% 171 | // so we will replace them if present 172 | preValCmd = StringUtils.replace(preValCmd, "%worker.initialize.cmd%", props.getProperty("worker.initialize.cmd")); 173 | preValCmdEnv = StringUtils.replace(preValCmdEnv, "%worker.initialize.cmd.env%", props.getProperty("worker.initialize.cmd.env")); 174 | 175 | if (preValCmd != null) { 176 | 177 | Map env = null; 178 | if (preValCmdEnv != null) { 179 | env = Splitter.on(",").withKeyValueSeparator("=").split(preValCmdEnv); 180 | } 181 | 182 | // preValCmd can have multiple delimited by ; 183 | List cmdsToRun = new ArrayList(); 184 | if (preValCmd.indexOf(";") != -1) { 185 | cmdsToRun.addAll(Arrays.asList(preValCmd.split(";"))); 186 | } 187 | 188 | for (String cmd : cmdsToRun) { 189 | // execute it! 190 | logger.debug("Running pre.validate command: " + cmd); 191 | CommandLine cmdLine = CommandLine.parse(cmd); 192 | DefaultExecutor executor = new DefaultExecutor(); 193 | executor.execute(cmdLine, env); 194 | } 195 | 196 | } 197 | } 198 | 199 | private void initWriteMonitor(Properties props) throws Exception { 200 | String writeMonitorClass = props.getProperty("worker.write.complete.monitor.class"); 201 | if (writeMonitorClass != null) { 202 | logger.debug("Attempting to create WriteMonitor: " + writeMonitorClass); 203 | this.writeMonitor = (WriteMonitor)Class.forName(writeMonitorClass).newInstance(); 204 | 205 | if (writeMonitor instanceof Yas3fsS3UploadMonitor) { 206 | Yas3fsS3UploadMonitor m = (Yas3fsS3UploadMonitor)writeMonitor; 207 | m.setIsIdleWhenNZeroUploads(10); // the monitor must state 10 consecutive cycles of no s3 uploads to be "idle" 208 | m.setCheckEveryMS(Long.valueOf(props.getProperty("worker.write.complete.monitor.yas3fs.checkEveryMS"))); 209 | m.setPathToLogFile(props.getProperty("worker.write.complete.monitor.yas3fs.logFilePath")); 210 | } 211 | } 212 | } 213 | 214 | private void initWriteBackoffMonitor(Properties props) throws Exception { 215 | String writeBackoffMonitorClass = props.getProperty("worker.write.backoff.monitor.class"); 216 | if (writeBackoffMonitorClass != null) { 217 | logger.debug("Attempting to create WriteBackoffMonitor: " + writeBackoffMonitorClass); 218 | this.writeBackoffMonitor = (WriteBackoffMonitor)Class.forName(writeBackoffMonitorClass).newInstance(); 219 | 220 | if (writeBackoffMonitor instanceof Yas3fsS3UploadMonitor) { 221 | Yas3fsS3UploadMonitor m = (Yas3fsS3UploadMonitor)writeBackoffMonitor; 222 | m.setBackoffWhenMultipartUploads(Integer.valueOf(props.getProperty("worker.write.backoff.monitor.yas3fs.backoffWhenMultipartUploads"))); 223 | m.setBackoffWhenTotalHTTPSConns(Integer.valueOf(props.getProperty("worker.write.backoff.monitor.yas3fs.backoffWhenTotalHTTPSConns"))); 224 | m.setBackoffWhenTotalS3Uploads(Integer.valueOf(props.getProperty("worker.write.backoff.monitor.yas3fs.backoffWhenTotalS3Uploads"))); 225 | m.setCheckEveryMS(Long.valueOf(props.getProperty("worker.write.backoff.monitor.yas3fs.checkEveryMS"))); 226 | m.setPathToLogFile(props.getProperty("worker.write.backoff.monitor.yas3fs.logFilePath")); 227 | } 228 | } 229 | } 230 | 231 | private void initWriteErrorMonitor(Properties props) throws Exception { 232 | String writeErrorMonitorClass = props.getProperty("worker.write.error.monitor.class"); 233 | if (writeErrorMonitorClass != null) { 234 | logger.debug("Attempting to create WriteErrorMonitor: " + writeErrorMonitorClass); 235 | this.writeErrorMonitor = (WriteErrorMonitor)Class.forName(writeErrorMonitorClass).newInstance(); 236 | 237 | if (writeErrorMonitor instanceof Yas3fsS3UploadMonitor) { 238 | Yas3fsS3UploadMonitor m = (Yas3fsS3UploadMonitor)writeErrorMonitor; 239 | m.setCheckEveryMS(Long.valueOf(props.getProperty("worker.write.error.monitor.yas3fs.checkEveryMS"))); 240 | m.setPathToLogFile(props.getProperty("worker.write.error.monitor.yas3fs.logFilePath")); 241 | } 242 | } 243 | } 244 | 245 | public void startConsuming() { 246 | tocQueueConsumersArePaused = false; 247 | for (TOCQueue consumer : tocQueueConsumers) { 248 | consumer.start(); 249 | } 250 | } 251 | 252 | public void pauseConsuming() { 253 | tocQueueConsumersArePaused = true; 254 | for (TOCQueue consumer : tocQueueConsumers) { 255 | consumer.pauseConsuming(); 256 | } 257 | } 258 | 259 | public void resumeConsuming() { 260 | tocQueueConsumersArePaused = false; 261 | for (TOCQueue consumer : tocQueueConsumers) { 262 | consumer.resumeConsuming(); 263 | } 264 | } 265 | 266 | public void destroy() { 267 | 268 | // kill payload handlers 269 | for (TOCPayloadHandler handler : mode2TOCHandlerMap.values()) { 270 | try { handler.destroy(); } catch(Exception ignore){} 271 | } 272 | 273 | // upload logs 274 | try { 275 | this.s3util.uploadToS3(this.s3Client, 276 | this.shutdownInfo.s3LogBucketName, 277 | this.shutdownInfo.s3LogBucketFolderRoot, 278 | this.myWorkerState.getWorkerIP(), 279 | this.shutdownInfo.workerLogFilesToUpload); 280 | } catch(Exception ignore){} 281 | 282 | 283 | try { 284 | writeMonitor.destroy(); 285 | } catch(Exception ignore){} 286 | 287 | try { 288 | writeBackoffMonitor.destroy(); 289 | } catch(Exception ignore){} 290 | 291 | try { 292 | writeErrorMonitor.destroy(); 293 | } catch(Exception ignore){} 294 | 295 | 296 | try { 297 | controlChannel.destroy(); 298 | } catch(Exception ignore){} 299 | 300 | for (TOCQueue consumer : tocQueueConsumers) { 301 | consumer.stopConsuming(); 302 | } 303 | 304 | try { 305 | Thread.currentThread().sleep(30000); 306 | 307 | for (TOCQueue consumer : tocQueueConsumers) { 308 | consumer.destroy(); 309 | } 310 | } catch(Exception ignore){} 311 | 312 | try { 313 | runInitOrDestroyCommand("destroy",this.properties); 314 | } catch(Exception ignore){} 315 | 316 | } 317 | 318 | 319 | public void handlePayload(CCPayload payload) throws Exception { 320 | 321 | // we only care about master payloads and stuff from other than us 322 | if (!payload.fromMaster) { 323 | return; 324 | } 325 | 326 | // ignore messages targeted for someone other than ourself 327 | if (payload.onlyForHostIdOrIP != null && 328 | !payload.onlyForHostIdOrIP.equalsIgnoreCase(this.myWorkerState.getWorkerHostSourceId()) && 329 | !payload.onlyForHostIdOrIP.equalsIgnoreCase(this.myWorkerState.getWorkerIP())) { 330 | return; 331 | } 332 | 333 | logger.info("handlePayload() received CCPayload: fromMaster: " + payload.fromMaster + 334 | " sourceHostId:" + payload.sourceHostId + 335 | " sourceHostIp:" + payload.sourceHostIP + 336 | " onlyForHost:" + payload.onlyForHostIdOrIP + 337 | " type:" + payload.type + 338 | " value:" + payload.value); 339 | 340 | // the mode that the master reports we should switch to 341 | if (payload.type == CCPayloadType.MASTER_CURRENT_MODE) { 342 | 343 | CCMode masterMode = CCMode.valueOf(payload.value.toString()); 344 | 345 | 346 | // do we need to change our mode? 347 | if (myWorkerState.getCurrentMode() != masterMode) { 348 | 349 | // set it 350 | myWorkerState.setCurrentMode(masterMode); 351 | 352 | // if we are now WRITE/VALIDATE mode ensure we spawn our threads 353 | if (myWorkerState.getCurrentMode() == CCMode.WRITE || myWorkerState.getCurrentMode() == CCMode.VALIDATE) { 354 | 355 | if (this.tocQueueConsumers.size() == 0) { 356 | 357 | logger.debug("CCMode switched to mode "+myWorkerState.getCurrentMode()+ 358 | ": Worker spawing " + totalConsumerThreads + " separate TOCQueue consumer threads..."); 359 | for (int i=0; i initTOCPayloadHandlers(Properties props) throws Exception { 467 | String writeClazz = props.getProperty("tocPayloadHandler.write.class"); 468 | String validateClazz = props.getProperty("tocPayloadHandler.validate.class"); 469 | 470 | Map map = new HashMap(); 471 | map.put(MODE.WRITE, initTOCPayloadHandler(writeClazz,props)); 472 | map.put(MODE.VALIDATE, initTOCPayloadHandler(validateClazz,props)); 473 | 474 | return map; 475 | 476 | } 477 | 478 | private TOCPayloadHandler initTOCPayloadHandler(String className, Properties props) throws Exception { 479 | 480 | TOCPayloadHandler handler = (TOCPayloadHandler)Class.forName(className).newInstance(); 481 | 482 | 483 | /** 484 | * FileCopyTOCPayloadHandler 485 | */ 486 | if (handler instanceof FileCopyTOCPayloadHandler) { 487 | 488 | FileCopyTOCPayloadHandler fcHandler = (FileCopyTOCPayloadHandler)handler; 489 | 490 | fcHandler.setSourceDirectoryRootPath(props.getProperty("tocPayloadHandler.source.dir.root")); 491 | 492 | fcHandler.setTargetDirectoryRootPath(props.getProperty("tocPayloadHandler.target.dir.root")); 493 | 494 | fcHandler.setUseRsync(Boolean.valueOf(props.getProperty("tocPayloadHandler.write.use.rsync"))); 495 | 496 | fcHandler.setRetries(Integer.valueOf(props.getProperty("tocPayloadHandler.write.retries"))); 497 | 498 | fcHandler.setRetriesSleepMS(Long.valueOf(props.getProperty("tocPayloadHandler.write.retries.sleep.ms"))); 499 | 500 | 501 | if (props.getProperty("tocPayloadHandler.write.post.success.validate.local.dir") != null) { 502 | 503 | fcHandler.setPostWriteLocalValidateRootDir(((String)props.getProperty("tocPayloadHandler.write.post.success.validate.local.dir"))); 504 | 505 | if (props.getProperty("tocPayloadHandler.write.post.success.validate.logfile") == null) { 506 | throw new Exception("tocPayloadHandler.write.post.success.validate.logfile must be specified if tocPayloadHandler.write.post.success.validate.local.dir is enabled"); 507 | } 508 | 509 | fcHandler.setPostWriteLocalValidateLogFile( 510 | ((String)props.getProperty("tocPayloadHandler.write.post.success.validate.logfile"))); 511 | 512 | fcHandler.setPostWriteLocalValidateSkipDirectories( 513 | (Boolean.valueOf(props.getProperty("tocPayloadHandler.write.post.success.validate.skipDirectories")))); 514 | 515 | // set validator (note its not configured for S3! only local checks) 516 | fcHandler.setTocPayloadValidator(new TOCPayloadValidator()); 517 | 518 | } 519 | 520 | if (props.getProperty("tocPayloadHandler.write.rsync.tolerable.error.regex") != null) { 521 | fcHandler.setRsyncTolerableErrorsRegex((String)props.getProperty("tocPayloadHandler.write.rsync.tolerable.error.regex")); 522 | } 523 | 524 | if (fcHandler.isUseRsync()) { 525 | fcHandler.setRsyncOptions(props.getProperty("tocPayloadHandler.write.rsync.options")); 526 | } 527 | 528 | String chmod = props.getProperty("tocPayloadHandler.write.chmod"); 529 | if (chmod != null) { 530 | boolean dirsOnly = Boolean.valueOf(props.getProperty("tocPayloadHandler.write.chmod.dirsOnly")); 531 | fcHandler.setChmod(chmod); 532 | fcHandler.setChmodDirsOnly(dirsOnly); 533 | } 534 | 535 | String chown = props.getProperty("tocPayloadHandler.write.chown"); 536 | if (chown != null) { 537 | boolean dirsOnly = Boolean.valueOf(props.getProperty("tocPayloadHandler.write.chown.dirsOnly")); 538 | fcHandler.setChown(chown); 539 | fcHandler.setChownDirsOnly(dirsOnly); 540 | } 541 | 542 | return handler; 543 | } 544 | 545 | 546 | 547 | /** 548 | * ValidatingTOCPayloadHandler 549 | */ 550 | if (handler instanceof ValidatingTOCPayloadHandler) { 551 | ValidatingTOCPayloadHandler vhandler = (ValidatingTOCPayloadHandler)handler; 552 | 553 | vhandler.setTargetDirectoryRootPath(props.getProperty("tocPayloadHandler.target.dir.root")); 554 | 555 | vhandler.setValidateMode( 556 | org.bitsofinfo.s3.toc.ValidatingTOCPayloadHandler.MODE.valueOf( 557 | props.getProperty("tocPayloadHandler.validate.mode"))); 558 | 559 | vhandler.setS3BucketName(props.getProperty("tocPayloadHandler.validate.s3.bucketName")); 560 | 561 | vhandler.setS3Client(new AmazonS3Client(new BasicAWSCredentials(this.awsAccessKey, this.awsSecretKey))); 562 | 563 | return vhandler; 564 | } 565 | 566 | /** 567 | * S3KeyCopyingTOCPayloadHandler 568 | */ 569 | if (handler instanceof S3KeyCopyingTOCPayloadHandler) { 570 | 571 | S3KeyCopyingTOCPayloadHandler fcHandler = (S3KeyCopyingTOCPayloadHandler)handler; 572 | 573 | fcHandler.setS3Client(this.s3Client); 574 | fcHandler.setSourceS3BucketName(props.getProperty("tocPayloadHandler.write.s3keyCopy.sourceS3BucketName").toString()); 575 | fcHandler.setTargetS3BucketName(props.getProperty("tocPayloadHandler.write.s3keyCopy.targetS3BucketName").toString()); 576 | fcHandler.setEnableServerSideEncryption(Boolean.valueOf(props.getProperty("tocPayloadHandler.write.s3keyCopy.enableServerSideEncryption"))); 577 | fcHandler.setStorageClass(StorageClass.valueOf(props.getProperty("tocPayloadHandler.write.s3keyCopy.storageClass"))); 578 | 579 | return fcHandler; 580 | } 581 | 582 | 583 | throw new Exception("initTOCPayloadHandler() invalid tocPayloadHandler.class " + className); 584 | 585 | } 586 | 587 | 588 | private String getResultsSummaryAsJSON(MODE mode) { 589 | 590 | if (mode == MODE.WRITE) { 591 | ResultSummary writeSummary = new ResultSummary(this.tocQueueConsumersArePaused, 592 | myWorkerState.getTotalWritesOK(), 593 | myWorkerState.getTotalWritesFailed(), 594 | myWorkerState.getTotalErrorsTolerated(), 595 | myWorkerState.getTotalWriteMonitorErrors(), 596 | myWorkerState.getTotalPostWriteLocalValidateFailures(), 597 | myWorkerState.getTotalWritesProcessed()); 598 | 599 | return gson.toJson(writeSummary); 600 | 601 | 602 | } else if (mode == MODE.VALIDATE) { 603 | 604 | ResultSummary validateSummary = new ResultSummary(this.tocQueueConsumersArePaused, 605 | myWorkerState.getTotalValidatesOK(), 606 | myWorkerState.getTotalValidatesFailed(), 607 | myWorkerState.getTotalErrorsTolerated(), 608 | myWorkerState.getTotalWriteMonitorErrors(), 609 | myWorkerState.getTotalPostWriteLocalValidateFailures(), 610 | myWorkerState.getTotalValidationsProcessed()); 611 | 612 | return gson.toJson(validateSummary); 613 | } 614 | 615 | throw new RuntimeException("getResultsSummaryAsJSON() called with invalid MODE: " + mode); 616 | } 617 | 618 | public void run() { 619 | 620 | boolean running = true; 621 | 622 | // Here we monitor our WorkerState 623 | // to determine where we are at 624 | while (running) { 625 | try { 626 | 627 | Thread.currentThread().sleep(20000); 628 | 629 | // if just in Initialized/Idle state do nothing. 630 | if (this.myWorkerState.getCurrentMode() == CCMode.INITIALIZED || 631 | this.myWorkerState.getCurrentMode() == CCMode.IDLE ) { 632 | 633 | // if we are just in INITIALIZED state, and still are after 30s, resend it 634 | // as we have seen worker initializd messages not get delivered... 635 | long now = System.currentTimeMillis(); 636 | if (this.myWorkerState.getCurrentMode() == CCMode.INITIALIZED && 637 | this.initializedLastSentAtMS > 0 && 638 | (now - this.initializedLastSentAtMS > 60000)) { 639 | 640 | logger.debug("Resending INITIALIZED state to master......"); 641 | this.sendInitializedState(); 642 | } 643 | 644 | continue; 645 | } 646 | 647 | /** 648 | * WRITES DONE? 649 | */ 650 | if (this.myWorkerState.getCurrentMode() == CCMode.WRITE) { 651 | 652 | // determine TOCQueue threads who have been idle long enough to proceed to a state change assumption 653 | int threadsThatQualify = getIdleTOCQueueThreads(); 654 | 655 | // ok all threads are created AND idle, send our summary...as we can only assume we are done 656 | if (this.tocQueueConsumers.size() == this.totalConsumerThreads && 657 | threadsThatQualify >= this.tocQueueConsumers.size()) { 658 | 659 | // if the write monitor IS configured and states we CANNOT proceed... 660 | // then just exit/continue... 661 | if (writeMonitor != null && !writeMonitor.writesAreComplete()) { 662 | continue; 663 | } 664 | 665 | // final of update write error monitor if exists 666 | if (this.writeErrorMonitor != null) { 667 | this.myWorkerState.addWriteMonitorErrors( 668 | this.writeErrorMonitor.getWriteErrors()); 669 | } 670 | 671 | String asJson = getResultsSummaryAsJSON(MODE.WRITE); 672 | 673 | // pause 674 | this.pauseConsuming(); 675 | 676 | // send out our summary 677 | this.controlChannel.send(false, CCPayloadType.WORKER_WRITES_FINISHED_SUMMARY, asJson); 678 | 679 | // state we are IDLE 680 | this.myWorkerState.setCurrentMode(CCMode.IDLE); 681 | this.controlChannel.send(false, CCPayloadType.WORKER_CURRENT_MODE, CCMode.IDLE); 682 | 683 | 684 | // not finished but lets send a CURRENT_SUMMARY, if necessary 685 | } else { 686 | 687 | // update write error monitor if exists 688 | if (this.writeErrorMonitor != null) { 689 | this.myWorkerState.addWriteMonitorErrors( 690 | this.writeErrorMonitor.getWriteErrors()); 691 | } 692 | 693 | 694 | // build/send the summary 695 | long now = System.currentTimeMillis();; 696 | if ((now - this.currentSummaryLastSentAtMS) > this.sendCurrentSummariesEveryMS) { 697 | // send out our summary 698 | this.currentSummaryLastSentAtMS = now; 699 | String asJson = getResultsSummaryAsJSON(MODE.WRITE); 700 | this.controlChannel.send(false, CCPayloadType.WORKER_WRITES_CURRENT_SUMMARY, asJson); 701 | } 702 | 703 | // should the TOCQueue thread consumers backoff? 704 | if (this.writeBackoffMonitor != null) { 705 | 706 | if (this.writeBackoffMonitor.writesShouldBackoff()) { 707 | if (!this.tocQueueConsumersArePaused) { 708 | logger.debug("WriteBackoffMonitor states we should BACKOFF... pausing TOCQueue consumers"); 709 | this.pauseConsuming(); 710 | } 711 | } else { 712 | if (this.tocQueueConsumersArePaused) { 713 | logger.debug("WriteBackoffMonitor states we can RESUME... resuming TOCQueue consumers"); 714 | this.resumeConsuming(); 715 | } 716 | } 717 | } 718 | 719 | } 720 | } 721 | 722 | 723 | 724 | /** 725 | * VALIDATES DONE? 726 | */ 727 | if (this.myWorkerState.getCurrentMode() == CCMode.VALIDATE) { 728 | 729 | // determine TOCQueue threads who have been idle long enough to proceed to a state change assumption 730 | int threadsThatQualify = getIdleTOCQueueThreads(); 731 | 732 | // ok all threads are created AND idle, send our summary...as we can only assume we are done 733 | if (this.tocQueueConsumers.size() == this.totalConsumerThreads && 734 | threadsThatQualify >= this.tocQueueConsumers.size()) { 735 | 736 | String asJson = getResultsSummaryAsJSON(MODE.VALIDATE); 737 | 738 | // pause 739 | this.pauseConsuming(); 740 | 741 | // send out our summary 742 | this.controlChannel.send(false, CCPayloadType.WORKER_VALIDATIONS_FINISHED_SUMMARY, asJson); 743 | 744 | // state we are IDLE 745 | this.myWorkerState.setCurrentMode(CCMode.IDLE); 746 | this.controlChannel.send(false, CCPayloadType.WORKER_CURRENT_MODE, CCMode.IDLE); 747 | 748 | 749 | // not finished but lets send a CURRENT_SUMMARY 750 | } else { 751 | long now = System.currentTimeMillis();; 752 | if ((now - this.currentSummaryLastSentAtMS) > this.sendCurrentSummariesEveryMS) { 753 | // send out our summary 754 | this.currentSummaryLastSentAtMS = now; 755 | String asJson = getResultsSummaryAsJSON(MODE.VALIDATE); 756 | this.controlChannel.send(false, CCPayloadType.WORKER_VALIDATIONS_CURRENT_SUMMARY, asJson); 757 | } 758 | } 759 | } 760 | 761 | } catch(Exception e) { 762 | logger.error("run() unexpected error: " + e.getMessage(),e); 763 | } 764 | } 765 | } 766 | 767 | private int getIdleTOCQueueThreads() { 768 | int threadsThatQualify = 0; 769 | for (TOCQueue tocQueue : this.tocQueueConsumers) { 770 | 771 | // not even connected/ready yet, it has not even 772 | // made at least 'consumerThreadMinRequestsBeforeIdle' requests to get messages 773 | if (tocQueue.getTotalMessageRequestsMade() < this.consumerThreadMinRequestsBeforeIdle) { 774 | continue; 775 | } 776 | 777 | long lastMsgReceivedAtMS = (System.currentTimeMillis() - tocQueue.getLastMsgReceivedTimeMS()); 778 | 779 | if (!tocQueue.isPaused() && 780 | !tocQueue.isCurrentlyProcessingMessage() && 781 | 782 | ( 783 | // has processed at least one message and has not done another past our allowed idle time 784 | (tocQueue.getTotalMessagesProcessed() > 0 && lastMsgReceivedAtMS >= this.declareWorkerIdleAtMinLastMsgReceivedMS) 785 | || 786 | // or just has never processed a message, this can happen if too many consumer threads 787 | // some won't process any messages 788 | (tocQueue.getTotalMessagesProcessed() == 0) 789 | 790 | )) { 791 | 792 | threadsThatQualify++; 793 | } 794 | } 795 | return threadsThatQualify; 796 | } 797 | 798 | public void handlePayload(TOCPayload payload,WorkerState workerState) throws Exception { 799 | this.handlePayload(payload); 800 | } 801 | } 802 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/worker/WorkerState.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.worker; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashSet; 5 | import java.util.List; 6 | import java.util.Set; 7 | 8 | import org.bitsofinfo.s3.cmd.TocPathOpResult; 9 | import org.bitsofinfo.s3.control.CCMode; 10 | 11 | public class WorkerState { 12 | 13 | private String workerHostSourceId = null; 14 | private String workerIP = null; 15 | private CCMode currentMode = null; 16 | private Set writeMonitorErrors = new HashSet(); 17 | private List tocPathsPostWriteLocalValidateFailures = new ArrayList(); 18 | private List tocPathsErrorsTolerated = new ArrayList(); 19 | private List tocPathsWritten = new ArrayList(); 20 | private List tocPathsValidated =new ArrayList(); 21 | private List tocPathsWriteFailures = new ArrayList(); 22 | private List tocPathsValidateFailures = new ArrayList(); 23 | 24 | public WorkerState(String workerHostSourceId, String workerIP) { 25 | super(); 26 | this.workerHostSourceId = workerHostSourceId; 27 | this.workerIP = workerIP; 28 | } 29 | 30 | public String getWorkerHostSourceId() { 31 | return workerHostSourceId; 32 | } 33 | public int getTotalPostWriteLocalValidateFailures() { 34 | return tocPathsPostWriteLocalValidateFailures.size(); 35 | } 36 | public int getTotalWriteMonitorErrors() { 37 | return writeMonitorErrors.size(); 38 | } 39 | public int getTotalWritesOK() { 40 | return tocPathsWritten.size(); 41 | } 42 | public int getTotalValidatesOK() { 43 | return tocPathsValidated.size(); 44 | } 45 | public int getTotalWritesFailed() { 46 | return tocPathsWriteFailures.size(); 47 | } 48 | public int getTotalValidatesFailed() { 49 | return tocPathsValidateFailures.size(); 50 | } 51 | public int getTotalErrorsTolerated() { 52 | return tocPathsErrorsTolerated.size(); 53 | } 54 | 55 | public CCMode getCurrentMode() { 56 | return this.currentMode; 57 | } 58 | 59 | public void setCurrentMode(CCMode mode) { 60 | this.currentMode = mode; 61 | } 62 | 63 | public synchronized void addWriteMonitorErrors(Set errors) { 64 | this.writeMonitorErrors.addAll(errors); 65 | } 66 | 67 | public synchronized void addTocPathErrorTolerated(TocPathOpResult path) { 68 | this.tocPathsErrorsTolerated.add(path); 69 | } 70 | 71 | public synchronized void addTocPathWritten(TocPathOpResult path) { 72 | this.tocPathsWritten.add(path); 73 | } 74 | 75 | public synchronized void addTocPathValidated(TocPathOpResult path) { 76 | this.tocPathsValidated.add(path); 77 | } 78 | 79 | public synchronized void addTocPathPostWriteLocalValidateFailure(TocPathOpResult path) { 80 | this.tocPathsPostWriteLocalValidateFailures.add(path); 81 | } 82 | 83 | public synchronized void addTocPathValidateFailure(TocPathOpResult path) { 84 | this.tocPathsValidateFailures.add(path); 85 | } 86 | 87 | public synchronized void addTocPathWriteFailure(TocPathOpResult path) { 88 | this.tocPathsWriteFailures.add(path); 89 | } 90 | 91 | public int getTotalWritesProcessed() { 92 | // note we do not include "tolerated" here because they are part of the OKs 93 | // and the "tolerated" stuff is just supplemental information 94 | return getTotalWritesFailed() + getTotalWritesOK(); 95 | } 96 | 97 | public int getTotalValidationsProcessed() { 98 | return getTotalValidatesFailed() + getTotalValidatesOK(); 99 | } 100 | 101 | public List getTocPathsWriteFailures() { 102 | return tocPathsWriteFailures; 103 | } 104 | 105 | public List getTocPathsErrorsTolerated() { 106 | return tocPathsErrorsTolerated; 107 | } 108 | 109 | public Set getWriteMonitorErrors() { 110 | return writeMonitorErrors; 111 | } 112 | 113 | public void setTocPathsWriteFailures( 114 | List tocPathsWriteFailures) { 115 | this.tocPathsWriteFailures = tocPathsWriteFailures; 116 | } 117 | 118 | public List getTocPathValidateFailures() { 119 | return tocPathsValidateFailures; 120 | } 121 | 122 | public void setTocPathValidateFailures( 123 | List filePathValidateFailures) { 124 | this.tocPathsValidateFailures = filePathValidateFailures; 125 | } 126 | 127 | public String getWorkerIP() { 128 | return workerIP; 129 | } 130 | 131 | public List getTocPathsPostWriteLocalValidateFailures() { 132 | return tocPathsPostWriteLocalValidateFailures; 133 | } 134 | 135 | public void setTocPathsPostWriteLocalValidateFailures( 136 | List tocPathsPostWriteLocalValidateFailures) { 137 | this.tocPathsPostWriteLocalValidateFailures = tocPathsPostWriteLocalValidateFailures; 138 | } 139 | 140 | } 141 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/worker/WriteBackoffMonitor.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.worker; 2 | 3 | public interface WriteBackoffMonitor { 4 | 5 | public boolean writesShouldBackoff(); 6 | public void destroy(); 7 | public void start(); 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/worker/WriteErrorMonitor.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.worker; 2 | 3 | import java.util.Set; 4 | 5 | public interface WriteErrorMonitor { 6 | 7 | public Set getWriteErrors(); 8 | public void destroy(); 9 | public void start(); 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/worker/WriteMonitor.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.worker; 2 | 3 | public interface WriteMonitor { 4 | 5 | public boolean writesAreComplete(); 6 | public void destroy(); 7 | public void start(); 8 | 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/worker/WriteMonitorError.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.worker; 2 | 3 | import java.util.Date; 4 | 5 | public class WriteMonitorError { 6 | 7 | private Date timestamp = null; 8 | private String msg = null; 9 | 10 | public WriteMonitorError(Date timestamp, String msg) { 11 | this.timestamp = timestamp; 12 | this.msg = msg; 13 | } 14 | 15 | public boolean equals(Object o) { 16 | if (!(o instanceof WriteMonitorError)) { 17 | return false; 18 | } 19 | 20 | WriteMonitorError other = (WriteMonitorError)o; 21 | if (other.getTimestamp().equals(this.timestamp)) { 22 | return true; 23 | } 24 | 25 | return false; 26 | } 27 | 28 | public int hashCode() { 29 | return timestamp.hashCode(); 30 | } 31 | 32 | public Date getTimestamp() { 33 | return timestamp; 34 | } 35 | public void setTimestamp(Date timestamp) { 36 | this.timestamp = timestamp; 37 | } 38 | public String getMsg() { 39 | return msg; 40 | } 41 | public void setMsg(String msg) { 42 | this.msg = msg; 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/org/bitsofinfo/s3/yas3fs/Yas3fsS3UploadMonitor.java: -------------------------------------------------------------------------------- 1 | package org.bitsofinfo.s3.yas3fs; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.io.OutputStream; 7 | import java.io.RandomAccessFile; 8 | import java.io.StringWriter; 9 | import java.text.SimpleDateFormat; 10 | import java.util.Date; 11 | import java.util.HashSet; 12 | import java.util.Set; 13 | import java.util.Stack; 14 | import java.util.regex.Matcher; 15 | import java.util.regex.Pattern; 16 | 17 | import org.apache.commons.exec.CommandLine; 18 | import org.apache.commons.exec.DefaultExecutor; 19 | import org.apache.commons.exec.ExecuteStreamHandler; 20 | import org.apache.commons.io.IOUtils; 21 | import org.apache.log4j.Logger; 22 | import org.bitsofinfo.s3.worker.WriteBackoffMonitor; 23 | import org.bitsofinfo.s3.worker.WriteErrorMonitor; 24 | import org.bitsofinfo.s3.worker.WriteMonitor; 25 | import org.bitsofinfo.s3.worker.WriteMonitorError; 26 | 27 | /** 28 | * Monitors the Yas3fs log for entries like this looking for the s3_queue being zero 29 | * meaning that there are no uploads to s3 in progress. It also can act as a WriteBackoffMonitor 30 | * to monitor when the total number in s3_queue gets to high. 31 | * 32 | * INFO entries, mem_size, disk_size, download_queue, prefetch_queue, s3_queue: 1, 0, 0, 0, 0, 0 33 | * 34 | * Also has the ability to monitor the number of outgoing 443 SSL connections (to S3) 35 | * and can instruct to backoff if these are > that whatever the max is configured for 36 | * 37 | * @author bitsofinfo 38 | * 39 | */ 40 | public class Yas3fsS3UploadMonitor implements WriteMonitor, WriteBackoffMonitor, WriteErrorMonitor, Runnable { 41 | 42 | private static final Logger logger = Logger.getLogger(Yas3fsS3UploadMonitor.class); 43 | 44 | private long checkEveryMS = 10000; 45 | private int isIdleWhenNZeroUploads = 0; // count of the total number of s3UploadCounts entries must be ZERO to declare we are idel 46 | 47 | private String pathToLogFile = null; 48 | private boolean running = true; 49 | private Thread monitorThread = null; 50 | private String latestLogTail = null; 51 | 52 | private SimpleDateFormat logDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss,SSS"); 53 | 54 | private Integer backoffWhenTotalS3Uploads = 10; 55 | 56 | private Integer backoffWhenTotalHTTPSConns = 10; 57 | private int latestHTTPSConnTotal = 0; 58 | 59 | private Integer backoffWhenMultipartUploads = 2; 60 | 61 | private Stack s3UploadCounts = new Stack(); 62 | 63 | public Yas3fsS3UploadMonitor() { 64 | monitorThread = new Thread(this); 65 | } 66 | 67 | 68 | public Yas3fsS3UploadMonitor(String pathToLogFile, long checkEveryMS) { 69 | this.pathToLogFile = pathToLogFile; 70 | this.checkEveryMS = checkEveryMS; 71 | monitorThread = new Thread(this); 72 | } 73 | 74 | 75 | public Yas3fsS3UploadMonitor(String pathToLogFile, long checkEveryMS, int isIdleWhenNZeroUploads) { 76 | this.pathToLogFile = pathToLogFile; 77 | this.checkEveryMS = checkEveryMS; 78 | this.isIdleWhenNZeroUploads = isIdleWhenNZeroUploads; 79 | monitorThread = new Thread(this); 80 | } 81 | 82 | 83 | public Yas3fsS3UploadMonitor(String pathToLogFile, int backoffWhenTotalS3Uploads, long checkEveryMS) { 84 | this.pathToLogFile = pathToLogFile; 85 | this.checkEveryMS = checkEveryMS; 86 | this.backoffWhenTotalS3Uploads = backoffWhenTotalS3Uploads; 87 | monitorThread = new Thread(this); 88 | } 89 | 90 | public void start() { 91 | monitorThread.start(); 92 | } 93 | 94 | public void destroy() { 95 | this.running = false; 96 | } 97 | 98 | public void run() { 99 | while(running) { 100 | try { 101 | 102 | Thread.currentThread().sleep(this.checkEveryMS); 103 | 104 | 105 | try { 106 | /** 107 | * Check the log file 108 | */ 109 | RandomAccessFile file = new RandomAccessFile(new File(pathToLogFile), "r"); 110 | byte[] buffer = new byte[32768]; // read ~32k 111 | if (file.length() >= buffer.length) { 112 | file.seek(file.length()-buffer.length); 113 | } 114 | file.read(buffer, 0, buffer.length); 115 | file.close(); 116 | 117 | this.latestLogTail = new String(buffer,"UTF-8"); 118 | } catch(Exception e) { 119 | logger.error("Unexpected error tailing yas3fs.log: " + this.pathToLogFile + " " + e.getMessage(),e); 120 | } 121 | 122 | try { 123 | /** 124 | * Check netstat 125 | */ 126 | 127 | CommandLine cmdLine = new CommandLine("netstat"); 128 | cmdLine.addArgument("-na"); 129 | 130 | final StringWriter stdOut = new StringWriter(); 131 | final StringWriter stdErr = new StringWriter(); 132 | DefaultExecutor executor = new DefaultExecutor(); 133 | executor.setStreamHandler(new ExecuteStreamHandler() { 134 | public void setProcessOutputStream(InputStream is) throws IOException {IOUtils.copy(is, stdOut, "UTF-8");} 135 | public void setProcessErrorStream(InputStream is) throws IOException {IOUtils.copy(is, stdErr, "UTF-8");} 136 | public void stop() throws IOException {} 137 | public void start() throws IOException {} 138 | public void setProcessInputStream(OutputStream os) throws IOException {} 139 | }); 140 | 141 | logger.trace("Executing: " + cmdLine.toString()); 142 | 143 | int exitValue = executor.execute(cmdLine); 144 | if (exitValue > 0) { 145 | logger.error("Netstat check ERROR: exitCode: "+exitValue+" cmd=" + cmdLine.toString()); 146 | } 147 | 148 | String netstatOutput = stdOut.toString(); 149 | Pattern netstatPattern = Pattern.compile("443\\s+ESTABLISHED"); 150 | Matcher netstatMatcher = netstatPattern.matcher(netstatOutput); 151 | int total = 0; 152 | while (netstatMatcher.find()) { 153 | total += 1; 154 | } 155 | 156 | logger.trace("Latest total of Netstat outgoing HTTPS connections = " + total); 157 | this.latestHTTPSConnTotal = total; 158 | 159 | } catch(Exception e) { 160 | logger.error("Unexpected error netstating current HTTPS conns: " + this.pathToLogFile + " " + e.getMessage(),e); 161 | } 162 | 163 | } catch(Exception e) { 164 | logger.error("Unexpected error: " + this.pathToLogFile + " " + e.getMessage(),e); 165 | } 166 | } 167 | } 168 | 169 | public int getS3UploadQueueSize() { 170 | if (this.latestLogTail != null) { 171 | Pattern s3QueueSizePatten = Pattern.compile(".+s3_queue: \\d+, \\d+, \\d+, \\d+, \\d+, (\\d+).*"); 172 | Matcher m = s3QueueSizePatten.matcher(this.latestLogTail); 173 | int lastMatch = -1; 174 | 175 | while (m.find()) { 176 | lastMatch = Integer.valueOf(m.group(1).trim()); 177 | } 178 | 179 | return lastMatch; 180 | } 181 | 182 | return -1; 183 | } 184 | 185 | public boolean writesShouldBackoff() { 186 | 187 | int currentMultipartUploads = this.getCurrentMultipartUploads(); 188 | int currentS3UploadSize = this.getS3UploadQueueSize(); 189 | logger.debug("Latest Yas3fs s3_queue size = " + currentS3UploadSize); 190 | logger.debug("Latest Netstat outgoing HTTPS connections = " + latestHTTPSConnTotal); 191 | logger.debug("Latest Yas3fs multipart uploads = " + currentMultipartUploads); 192 | 193 | if (currentMultipartUploads >= this.backoffWhenMultipartUploads) { 194 | logger.debug("writesShouldBackoff() currentMultipartUploads=" + currentMultipartUploads + 195 | " and backoffWhenMultipartUploads=" + this.backoffWhenMultipartUploads); 196 | return true; 197 | } 198 | 199 | 200 | if (this.latestHTTPSConnTotal >= this.backoffWhenTotalHTTPSConns) { 201 | logger.debug("writesShouldBackoff() latestHTTPSConnTotal=" + latestHTTPSConnTotal + 202 | " and backoffWhenTotalHTTPSConns=" + this.backoffWhenTotalHTTPSConns); 203 | return true; 204 | } 205 | 206 | 207 | if (currentS3UploadSize >= this.backoffWhenTotalS3Uploads) { 208 | logger.debug("writesShouldBackoff() currentS3UploadSize=" + currentS3UploadSize + 209 | " and backoffWhenTotalS3Uploads=" + this.backoffWhenTotalS3Uploads); 210 | return true; 211 | } 212 | 213 | return false; 214 | } 215 | 216 | public boolean writesAreComplete() { 217 | // get the latest s3upload queue size 218 | int s3UploadQueueSize = this.getS3UploadQueueSize(); 219 | 220 | // add it to our list (most recent -> oldest) 221 | this.s3UploadCounts.push(s3UploadQueueSize); 222 | 223 | int count = -1; 224 | 225 | // if we have enought upload count history... 226 | if (this.s3UploadCounts.size() > this.isIdleWhenNZeroUploads) { 227 | 228 | // clone it 229 | Stack toScan = (Stack)this.s3UploadCounts.clone(); 230 | 231 | // look through N past upload counts we have checked 232 | // and add them all up... (the stack is a LIFO stack) 233 | // so most recent -> oldest 234 | count = 0; // init to zero.... 235 | for (int i=0; i 0) { 377 | logger.error("Netstat check ERROR: exitCode: "+exitValue+" cmd=" + cmdLine.toString()); 378 | } 379 | 380 | String netstatOutput = stdOut.toString(); 381 | Pattern netstatPattern = Pattern.compile("443\\s+ESTABLISHED"); 382 | Matcher netstatMatcher = netstatPattern.matcher(netstatOutput); 383 | int total = 0; 384 | while (netstatMatcher.find()) { 385 | total += 1; 386 | } 387 | 388 | System.out.println(total); 389 | */ 390 | } 391 | 392 | 393 | public Integer getBackoffWhenTotalHTTPSConns() { 394 | return backoffWhenTotalHTTPSConns; 395 | } 396 | 397 | 398 | public void setBackoffWhenTotalHTTPSConns(Integer backoffWhenTotalSSLConns) { 399 | this.backoffWhenTotalHTTPSConns = backoffWhenTotalSSLConns; 400 | } 401 | 402 | 403 | public Integer getBackoffWhenMultipartUploads() { 404 | return backoffWhenMultipartUploads; 405 | } 406 | 407 | 408 | public void setBackoffWhenMultipartUploads(Integer backoffWhenMultipartUploads) { 409 | this.backoffWhenMultipartUploads = backoffWhenMultipartUploads; 410 | } 411 | 412 | 413 | 414 | } 415 | -------------------------------------------------------------------------------- /src/main/resources/ec2-init-s3BucketLoader.sample.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import boto 4 | import subprocess 5 | import boto.s3.bucket 6 | import boto.s3.connection 7 | import signal 8 | import sys 9 | import time 10 | 11 | ################################## 12 | # This script is A SAMPLE ONLY 13 | # Its intended to be passed as the userData attribute 14 | # when the master node launches ec2 worker node instances 15 | # 16 | # Its up to you to do whatever you want to get your 17 | # worker nodes prepped and ready for work. 18 | # 19 | # This can be read in by the MASTER node via 20 | # the 'master.workers.ec2.userDataFile' property 21 | # when it launches ec2 nodes. 22 | # 23 | # For example: Here is where you can prepare 24 | # your launched ec2 worker node 25 | # to install and configure 26 | # 27 | # - fusepy 28 | # - yas3fs 29 | # - local dirs 30 | # - mount the shared NFS source dir 31 | # - set permissions 32 | # - get the latest worker.properties 33 | # - get the latest s3BucketLoader JAR 34 | # - whatever else you want 35 | # - start the worker! 36 | # 37 | # Note this sample references custom 38 | # fusepy/yas3fs RPMs, however you would 39 | # would have to create your own or change 40 | # customize this routine to install them 41 | # via a different way. 42 | # 43 | ################################## 44 | 45 | 46 | access_key = 'YOUR_KEY' 47 | secret_key = 'YOUR_KEY' 48 | 49 | s3BktLdrBucketName='nameOf.S3Bucket.2DownloadInstall.ResourcesFrom' 50 | 51 | s3MountRoot='/mydir/s3mount' 52 | s3BktLdrInstallRoot='/mydir/s3BucketLoader' 53 | 54 | s3BktLdrJar='s3-bucket-loader-0.0.1-SNAPSHOT.jar' 55 | s3BktLdrProps='s3BucketLoader.worker.properties' 56 | 57 | fusepyRPM='python-fusepy-version.rpm' 58 | yas3fsRPM='python-yas3fs-version.rpm' 59 | 60 | nfsServerPath='your.nfs.server.com:/exported/path' 61 | nfsLocalMountRoot='/mydir/nfs' 62 | 63 | # having local users/groups created 64 | # is important so that when the worker 65 | # copies files via rsync from the 'source' 66 | # share, that the uid/gids are preserved 67 | # by yas3fs when written to the s3 bucket 68 | # so that ultimately whatever application 69 | # you intend to mount the new bucket will 70 | # be able to run as its intended uid/gid 71 | # and access the files! 72 | username = 'whatever' 73 | uid = '400' 74 | groupname = 'whatever' 75 | gid = '400' 76 | 77 | ################################## 78 | # END 79 | ################################## 80 | 81 | def signal_handler(signal, frame): 82 | print 'You pressed Ctrl+C! sleeping to let s3BucketLoader cleanup...' 83 | time.sleep(30000) 84 | sys.exit(0) 85 | 86 | signal.signal(signal.SIGINT, signal_handler) 87 | 88 | conn = boto.connect_s3( 89 | aws_access_key_id = access_key, 90 | aws_secret_access_key = secret_key, 91 | host = 's3.amazonaws.com', 92 | calling_format = boto.s3.connection.OrdinaryCallingFormat(), 93 | ) 94 | 95 | bucket = conn.get_bucket(s3BktLdrBucketName) 96 | 97 | # user/group setup for perms 98 | subprocess.call(['groupadd', '-g', gid, groupname]) 99 | subprocess.call(['useradd', '-M', '-u', uid, '-g', gid, groupname]) 100 | 101 | # setup dirs 102 | subprocess.call(['mkdir', '-p', nfsLocalMountRoot]) 103 | subprocess.call(['mkdir', '-p', s3MountRoot]) 104 | subprocess.call(['mkdir', '-p', s3BktLdrInstallRoot]) 105 | 106 | # pull down software 107 | key = bucket.get_key(s3BktLdrJar) 108 | key.get_contents_to_filename(s3BktLdrInstallRoot+'/'+s3BktLdrJar) 109 | 110 | key = bucket.get_key(s3BktLdrProps) 111 | key.get_contents_to_filename(s3BktLdrInstallRoot+'/'+s3BktLdrProps) 112 | 113 | key = bucket.get_key(fusepyRPM) 114 | key.get_contents_to_filename(s3BktLdrInstallRoot+'/'+fusepyRPM) 115 | 116 | key = bucket.get_key(yas3fsRPM) 117 | key.get_contents_to_filename(s3BktLdrInstallRoot+'/'+yas3fsRPM) 118 | 119 | # perms 120 | subprocess.call(['chown', '-R', ('root:'+groupname), s3BktLdrInstallRoot]) 121 | subprocess.call(['chmod', '-R', '770', s3BktLdrInstallRoot]) 122 | 123 | # update fuse yas3fs, prep, install and configure so that its runnable 124 | # by the woker, for example, via the 'worker.initialize.cmd' property in the 125 | # s3BucketLoader.properties file the worker would use 126 | subprocess.call(['rpm', '-ivh', 'https://dl.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm']) 127 | subprocess.call(['yum', '-y', '--enablerepo=epel', 'install', 'python-argparse']) 128 | subprocess.call(['yum', '-y','--enablerepo=epel', 'localinstall', (s3BktLdrInstallRoot+'/'+fusepyRPM)]) 129 | subprocess.call(['yum', '-y','--enablerepo=epel', 'localinstall', (s3BktLdrInstallRoot+'/'+yas3fsRPM)]) 130 | subprocess.call(['mkdir', '-p', '/var/lib/yas3fs/s3BucketLoader']) 131 | subprocess.call(['chown', '-R', 'root:yas3fs', '/var/lib/yas3fs/s3BucketLoader']) 132 | subprocess.call(['chmod', '-R', '770', '/var/lib/yas3fs/s3BucketLoader']) 133 | subprocess.call(['usermod', '-a', '-G', groupname, 'yas3fs']) 134 | subprocess.call(['chkconfig', 'yas3fs', 'off', '0,1,2,3,4,5,6']) 135 | 136 | subprocess.call(['yum', '-y', 'install', 'nfs-utils', 'nfs-utils-lib']) 137 | subprocess.call(['mount', nfsServerPath, nfsLocalMountRoot]) 138 | 139 | text_file = open("/etc/fuse.conf", "w") 140 | text_file.write("user_allow_other") 141 | text_file.close() 142 | 143 | # launch S3BucketLoader which will start up in listening mode waiting for master ..... 144 | subprocess.call(['java', '-DisMaster=false', ('-DconfigFilePath='+s3BktLdrInstallRoot+'/'+s3BktLdrProps), ('-Ds3BucketLoaderHome='+s3BktLdrInstallRoot), '-jar', (s3BktLdrInstallRoot+'/'+s3BktLdrJar)]) 145 | 146 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, FILE, s3BucketLoaderAppender 2 | 3 | log4j.logger.org.bitsofinfo=DEBUG 4 | 5 | log4j.appender.s3BucketLoaderAppender=org.apache.log4j.ConsoleAppender 6 | log4j.appender.s3BucketLoaderAppender.layout=org.apache.log4j.PatternLayout 7 | log4j.appender.s3BucketLoaderAppender.layout.ConversionPattern=%d{yyyy-MM-dd HH:MM:ss} %p %C{1} [%L] %M() - %m %n 8 | 9 | log4j.appender.FILE=org.apache.log4j.FileAppender 10 | log4j.appender.FILE.File=${s3BucketLoaderHome}/s3BucketLoader.log 11 | log4j.appender.FILE.ImmediateFlush=true 12 | log4j.appender.FILE.Threshold=debug 13 | log4j.appender.FILE.Append=false 14 | log4j.appender.FILE.layout=org.apache.log4j.PatternLayout 15 | log4j.appender.FILE.layout.conversionPattern=%d{yyyy-MM-dd HH:MM:ss} %p %C{1} [%L] %M() - %m %n 16 | -------------------------------------------------------------------------------- /src/main/resources/s3BucketLoader.sample.properties: -------------------------------------------------------------------------------- 1 | # ==================================== 2 | # ==================================== 3 | # 4 | # ----------------------------- 5 | # S3BucketLoader config file 6 | # ----------------------------- 7 | # 8 | # This sample config file specifies 9 | # the configuration options for the two 10 | # modes the loader runs as (master/worker) 11 | # 12 | # The master communicates w/ workers via 13 | # a SNS control-channel, and publishes 14 | # the TOC (table of contents) via an 15 | # SQS queue. 16 | # 17 | # ==================================== 18 | # ==================================== 19 | 20 | 21 | # name prefix for the 'control channel' SNS topic 22 | # that will be created to coordinate workers 23 | aws.sns.control.topic.name=s3BucketLoaderControlChannel 24 | 25 | # name of the SQS 'table of contents' queue 26 | # that all workers consume from 27 | aws.sqs.queue.name=s3BucketLoaderTOCQueue 28 | 29 | # AWS creds to manage the above resources 30 | # as well as communicate to the S3 bucket(s) 31 | # You will need to tweak this user's IAM 32 | # policies to permit appropriate SNS/SQS/S3 access 33 | aws.access.key=YOUR_ACCESS_KEY 34 | aws.secret.key=YOUR_SECRET_KEY 35 | aws.account.principal.id=121212121221 36 | aws.user.arn=arn:aws:iam::121212121221:user/your.s3bucketLoader.username 37 | 38 | 39 | #################################### 40 | #################################### 41 | # 42 | # MASTER mode configuration 43 | # 44 | # - will only be consumed 45 | # if -DisMaster=true is set 46 | # 47 | ##################################### 48 | ##################################### 49 | 50 | # OPTIONAL: if the master should auto-shutdown itself 51 | # after N minutes after the entire process is complete 52 | # (equivlent to you manually doing a ^C on the app 53 | master.auto.shutdown.after.n.minutes=30 54 | 55 | # OPTIONAL: If you want the master to control (start | stop) 56 | # an ec2 instance identified by 'instanceId' which contains 57 | # the 'source' data that workers/master will access for TOC ops 58 | master.source.host.ec2.instanceId=i-xxxxxxx 59 | master.source.host.ec2.stopOnMasterShutdown=false 60 | master.source.host.post.start.cmd=mount 1.2.3.4:/exported/nfs /opt/nfs/toc_source 61 | master.source.host.pre.stop.cmd=umount /opt/nfs/toc_source 62 | 63 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 64 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 65 | # SourceTOCGenerator - the class used to generate 66 | # the 'table of contents', there are several different 67 | # ones you can use, you must configure ONLY ONE 68 | # and comment out the others! 69 | # 70 | # - DirectoryCrawler 71 | # - builds the TOC by crawling the a source 72 | # directory (recursively) for all files 73 | # 74 | # - TOCManifestBasedGenerator 75 | # - determines the files to scan in the source 76 | # directory based off a simple TOC manifest 77 | # rather than crawling an entire tree 78 | # 79 | # - S3BucketObjectLister 80 | # - determines the TOC by listing all keys 81 | # in a given S3 bucket (this assumes the 82 | # keys in the bucket follow a "filesystem" like 83 | # naming convention dir/ or dir/file etc. 84 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 85 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 86 | 87 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 88 | # SourceTOCGenerator = DIRECTORY CRAWLER 89 | # Will scan the 'source.dir' configured below 90 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 91 | tocGenerator.class=org.bitsofinfo.s3.toc.DirectoryCrawler 92 | tocGenerator.source.dir=/opt/nfs/toc_source 93 | 94 | # OPTIONAL: if set, will skip all files who's modified at 95 | # timestamp is OLDER than this date... 96 | tocGenerator.lastModifiedAtGreaterThanFilter=2014-10-22 97 | 98 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 99 | # SourceTOCGenerator = MANIFEST DRIVEN CRAWLER 100 | # Will scan the 'source.dir' configured below 101 | # based off the TOC/manifest located in the 102 | # manifest file 103 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 104 | 105 | # will scan the 'source.dir' configured below 106 | # using the manifest file which lists directory paths 107 | # of files that should be sent to the TOC queue 108 | # where the paths in the manifest are /relative/from/toc_source_dir 109 | tocGenerator.class=org.bitsofinfo.s3.toc.TOCManifestBasedGenerator 110 | tocGenerator.toc.manifest.file=/opt/nfs/toc_manifest.txt 111 | tocGenerator.source.dir=/opt/nfs/toc_source 112 | 113 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 114 | # SourceTOCGenerator = S3 BUCKET OBJECT LISTER 115 | # Will generate a TOC based off of all keys 116 | # found in the given bucket 117 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 118 | 119 | # which will scan the S3 'bucketName' configured below 120 | # to generate a TOC of paths relative/from/bucket-root 121 | tocGenerator.class=org.bitsofinfo.s3.toc.S3BucketObjectLister 122 | tocGenerator.source.s3.bucketName=source-bucket-name 123 | 124 | 125 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 126 | # MASTER WORKER CONTROL 127 | # 128 | # Total number of workers we expect to be running 129 | # and consuming the toc tasks we create. The higher 130 | # this number the faster it all works... you just 131 | # need to provide and launch the workers... which 132 | # if you turn 'master.workers.ec2.managed=true' on 133 | # will be much more seamless (and cost you $$) 134 | # NOTE: the master will not start publishing the TOC 135 | # until this number of workers have reported as 136 | # initialized. (if using ec2, the master has logic 137 | # to detect and auto-terminate suspect workers that 138 | # have yet to report in, to keep things moving 139 | master.workers.total=4 140 | 141 | # OPTIONAL: log uploading; if configured, 142 | # on SHUTDOWN, the master 143 | # will instruct all workers to upload the 144 | # named files to the given bucket at a location 145 | # as follows 146 | # 147 | # Workers: BUCKET_NAME/[master-generated-name]/[workerIP]/ 148 | # Master: BUCKET_NAME/[master-generated-name]/master/ 149 | master.s3.log.bucket=s3-bkt-ldr-logs 150 | master.s3.log.worker.files=/opt/s3BucketLoader/s3BucketLoader.log, /var/log/yas3fs/yas3fs.log 151 | master.s3.log.master.files=/opt/s3BucketLoader/s3BucketLoader.log, /some/dir/worker_error_reports.json 152 | 153 | # If ERROR report mode is triggered 154 | # specify the full path to the logfile 155 | # on the master where the JSON of error reports will be logged 156 | master.workers.error.report.logfile=/some/dir/worker_error_reports.json 157 | 158 | # number of threads which consume 159 | # the TOC entries the TOC generator creates and 160 | # dispatch them to the SQS TOC queue. 161 | master.tocqueue.dispatch.threads=8 162 | 163 | # Workers send period 'current' summary 164 | # messages over the control channel which contain 165 | # stats on the number of successes/fails for both 166 | # WRITE and VALIDATE modes, if any of these 167 | # CURRENT_SUMMARY contain failures this setting 168 | # controls if the master will stop the writes/validations 169 | # currently in progress and immediately switch 170 | # to REPORT_ERRORS mode.... If this is false 171 | # master will only go into ERROR_REPORT mode 172 | # when workers are complete and send their FINISHED_SUMMARY 173 | master.failfast.on.worker.current.summary.error=true 174 | 175 | # OPTIONAL, this will use workers.total to spin up ec2 instances 176 | # otherwise you are responsible for setting up workers 177 | # and getting them ready. If you use this it can cost 178 | # you $$ and you will want to use the userDataFile 179 | # contents to automate the 'setup' of your worker nodes 180 | # @see ec2 documentation for how to automate the setup 181 | # of ec2 nodes on startup with a custom 'user-data' file/script 182 | master.workers.ec2.managed=false 183 | master.workers.ec2.minutes.to.wait.for.worker.init=10 184 | master.workers.ec2.ami.id=ami-08842d60 185 | master.workers.ec2.instanceType=t2.micro 186 | master.workers.ec2.disk.deviceName=/dev/xvda 187 | master.workers.ec2.disk.volumeType=Standard 188 | master.workers.ec2.disk.size.gigabytes=30 189 | master.workers.ec2.keyName=myKey 190 | master.workers.ec2.securityGroupId=sg-3a8d065f 191 | master.workers.ec2.subnetId=subnet-80d1f3a8 192 | master.workers.ec2.shutdownBehavior=Terminate 193 | master.workers.ec2.userDataFile=/path/to/ec2-init-s3BucketLoader.py 194 | 195 | 196 | 197 | 198 | #################################### 199 | #################################### 200 | # 201 | # WORKER mode configuration 202 | # 203 | # - will only be consumed 204 | # if -DisMaster=false is set 205 | # 206 | ##################################### 207 | ##################################### 208 | 209 | # Total number of SQS TOC queue 210 | # consumer threads that will run 211 | # on each worker node. You will want 212 | # to tweak this based on the number of 213 | # cores your worker boxes have. Also 214 | # consider that if you are ultimately writing 215 | # through yas3fs, you have to account for 216 | # the threads yas3fs can potentially need 217 | # as well. 218 | worker.toc.consumer.threads.num=4 219 | 220 | # The minimum number of TOC message requests 221 | # a worker TOC queue consumer thread must make before 222 | # it can possibly be a candidate for idle evaluation 223 | # You will want to set this to an appropriate number 224 | # to ensure that all consumer threads have at least a chance 225 | # to receive messages and to avoid false positive premature 226 | # "idle" declaration in cases where the master takes a while 227 | # to start sending TOC messages onto the TOC queue. 228 | worker.toc.consumer.threads.min.requests.before.idle=20 229 | 230 | # OPTIONAL: Worker initialize command 231 | # 232 | # This will be run before the worker 233 | # reports itself as INITIALIZED 234 | # in this example we fire up yas3fs 235 | # on the node to mount the target S3 236 | # bucket that the worker node(s) will 237 | # write to. This obviously assumes that your 238 | # worker has the software required to run 239 | # the commands below....@see 'master.workers.ec2.userDataFile' 240 | worker.initialize.cmd=/path/to/yas3fs s3://BUCKET-NAME /opt/s3BucketLoader -l /path/to/yas3fs.log -d --st-blksize 131072 --read-retries-num 10 --read-retries-sleep 1 --download-retries-num 20 --download-retries-sleep 5 --recheck-s3 --cache-path /path/to/yas3fs/cache/s3BucketLoader --cache-on-disk 0 --cache-disk-size 30000 --with-plugin-class RecoverYas3fsPlugin --aws-managed-encryption --log-backup-count 20 --log-backup-gzip --log-mb-size 100 241 | worker.initialize.cmd.env=AWS_ACCESS_KEY_ID=YOUR_ACCESS_KEY,AWS_SECRET_ACCESS_KEY=YOUR_SECRET_KEY 242 | worker.destroy.cmd=fusermount -u /opt/s3BucketLoader 243 | 244 | 245 | # OPTIONAL: worker pre-VALIDATE mode commands 246 | # Note, the delimiter for each command is semi-colin ";" 247 | # These commands will be run as soon as VALIDATE mode is started. 248 | # Note here you can use %var% syntax to reference a pre-existing 249 | # 'worker.initialize.cmd.env' and 'worker.initialize.cmd' 250 | # if they were previously defined above in the init cmd section 251 | worker.pre.validate.cmd.env=%worker.initialize.cmd.env% 252 | worker.pre.validate.cmd=fusermount -u /opt/s3BucketLoader; rm -rf /yas3fs/cache/s3BucketLoader/*; %worker.initialize.cmd% 253 | 254 | # OPTIONAL: a 'writeBackoffMonitor" which monitors 255 | # yas3fs to determine when to backoff based on 256 | # checking the s3_queue number, if it gets to a certain 257 | # point all TOCQueue consuming threads will pause until 258 | # s3_queue drops (yas3fs writes in background and we don't 259 | # want to overload it). This also has the option via the 260 | # backoffWhenTotalHTTPSConns to backoff when the total # 261 | # of outgoing HTTPS conns is >= the configured number 262 | # Additionally it can backoff of the total yas3fs 263 | # multipart-uploads are >= N 264 | # 265 | # IMPORTANT the value for backoffWhenTotalHTTPSConns should take 266 | # into account the worker.toc.consumer.threads.num value and 267 | # factor that into the potential number of HTTPs connection 268 | # (i.e. the TOCQueue consumer threads have HTTPs conns to SQS!) 269 | worker.write.backoff.monitor.class=org.bitsofinfo.s3.yas3fs.Yas3fsS3UploadMonitor 270 | worker.write.backoff.monitor.yas3fs.backoffWhenMultipartUploads=2 271 | worker.write.backoff.monitor.yas3fs.backoffWhenTotalHTTPSConns=20 272 | worker.write.backoff.monitor.yas3fs.backoffWhenTotalS3Uploads=10 273 | worker.write.backoff.monitor.yas3fs.checkEveryMS=30000 274 | worker.write.backoff.monitor.yas3fs.logFilePath=/path/to/yas3fs.log 275 | 276 | # OPTIONAL: a 'writeErrorMonitor" which monitors 277 | # yas3fs log file for ERROR entries which occur in the background 278 | # asynchronously and will represent errors uploading to S3, AFTER 279 | # a file may have 'successfully' been written locally by the 280 | # 'tocPayloadHandler.write.class' below... 281 | worker.write.error.monitor.class=org.bitsofinfo.s3.yas3fs.Yas3fsS3UploadMonitor 282 | worker.write.error.monitor.yas3fs.checkEveryMS=30000 283 | worker.write.error.monitor.yas3fs.logFilePath=/path/to/yas3fs.log 284 | 285 | # OPTIONAL: a 'writeMonitor" which monitors 286 | # yas3fs to determine when it is really complete 287 | # with all background uploads (checks s3_queue status 288 | # for N number of times for it being consistently zero (0) 289 | # All rsyncs can finish, yet yas3fs could be uploading in 290 | # the background... 291 | worker.write.complete.monitor.class=org.bitsofinfo.s3.yas3fs.Yas3fsS3UploadMonitor 292 | worker.write.complete.monitor.yas3fs.checkEveryMS=30000 293 | worker.write.complete.monitor.yas3fs.logFilePath=/path/to/yas3fs.log 294 | 295 | 296 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 297 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 298 | # TOCPayloadHandler 'WRITE' mode handlers 299 | # 300 | # The class used to handle inbound TOCPayload's [write mode] 301 | # that a Worker receives from the SQS TOCQueue; you must 302 | # configure ONLY ONE and comment out the others! 303 | # 304 | # - FileCopyTOCPayloadHandler 305 | # - assumes inbound TOC paths represent 306 | # a path locally accessible on the file-system 307 | # from the worker, and attempts to recreate 308 | # this path against a target directory, including 309 | # creating directories, copying the file (rsync/cp) 310 | # and optionally setting permissions 311 | # 312 | # - S3KeyCopyingTOCPayloadHandler 313 | # - Interprets the TOC payload as a S3 object 314 | # key and issues a key-copy request for the 315 | # object from the source s3 bucket to the 316 | # target s3 bucket 317 | # 318 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 319 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 320 | 321 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 322 | # TOCPayloadHandler [WRITE MODE] = FILE COPY HANDLER 323 | # Runs a mkdir + [rsync | cp] + (chown & chmod) 324 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 325 | 326 | # FileCopyTOCPayloadHandler does (mkdir -p + (rsync | cp) + (optional chown/chmod)) 327 | tocPayloadHandler.write.class=org.bitsofinfo.s3.toc.FileCopyTOCPayloadHandler 328 | 329 | # number of retries for each FileCopy operation (mkdir, rsync | cp) etc 330 | tocPayloadHandler.write.retries=3 331 | tocPayloadHandler.write.retries.sleep.ms=5000 332 | 333 | # if set to FALSE, will just exec a standard "cp" 334 | tocPayloadHandler.write.use.rsync=true 335 | 336 | # rsync options, note this will be split on spaces 337 | # when its time to parse it 338 | tocPayloadHandler.write.rsync.options=--inplace -avz 339 | 340 | # OPTIONAL regex for permissible rsync errors that will be ignored and not reported 341 | # for example this this case the TOC source where paths are generated may be newer 342 | # than the copy the workers have, so we don't expect them all to work. 343 | tocPayloadHandler.write.rsync.tolerable.error.regex=(?s)(.*change_dir.*\\/opt\\/nfs.*No such file or directory.*) 344 | 345 | # OPTIONAL: if this property is set, the FileCopyTOCPayloadHandler 346 | # (immediately a successful local write to the target dir) 347 | # will attempt to validate the file exists locally and its size matches 348 | # up w/ what the TOC states by checking locally at the given path below 349 | # the result of this check will be written to the specified log file 350 | # NOTE the success of failure of this particular 'pre-validate' check 351 | # has no effect on the overall success/failure of the WRITE operation 352 | # NOTE: the log file will only contain validation FAILURES... 353 | tocPayloadHandler.write.post.success.validate.local.dir=/path/to/yas3fs/cache/root/files 354 | tocPayloadHandler.write.post.success.validate.logfile=/opt/s3BucketLoader/yas3fs-post-write-cache-validate.log 355 | tocPayloadHandler.write.post.success.validate.skipDirectories=true 356 | 357 | # OPTIONAL for FileCopyTOCPayloadHandler 358 | # these will be executed after the copy 359 | # of each file path, and configurable if to 360 | # be applied to directories ONLY or both files/dirs 361 | tocPayloadHandler.write.chmod.dirsOnly=true 362 | tocPayloadHandler.write.chmod=775 363 | tocPayloadHandler.write.chown.dirsOnly=true 364 | tocPayloadHandler.write.chown=500:500 365 | 366 | # This dir should have access to the shared copy of 367 | # source data that the TOC was generated from 368 | # This MUST be set if the tocPayloadHandler = FileCopyTOCPayloadHandler 369 | # and if the ValidatingTOCPayloadHandler is configured for any 370 | # of the local validate options 371 | tocPayloadHandler.source.dir.root=/opt/nfs/toc_source 372 | 373 | # This dir should be the 'target' dir of where 374 | # the files will be copied to (i.e. this would be the yas3fs s3 mount root) 375 | # This MUST be set if the tocPayloadHandler = FileCopyTOCPayloadHandler 376 | # and if the ValidatingTOCPayloadHandler is configured for any 377 | # of the local validate options 378 | tocPayloadHandler.target.dir.root=/opt/s3BucketLoader 379 | 380 | 381 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 382 | # TOCPayloadHandler [WRITE MODE] = S3 KEY COPY HANDLER 383 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 384 | 385 | # TOCPayloadHandler - S3KeyCopyingTOCPayloadHandler 386 | # which executes S3 key copies for every TOC entry received 387 | tocPayloadHandler.write.class=org.bitsofinfo.s3.toc.S3KeyCopyingTOCPayloadHandler 388 | tocPayloadHandler.write.s3keyCopy.sourceS3BucketName=source-s3-bucket-name 389 | tocPayloadHandler.write.s3keyCopy.targetS3BucketName=target-s3-bucket-name 390 | tocPayloadHandler.write.s3keyCopy.storageClass=ReducedRedundancy 391 | tocPayloadHandler.write.s3keyCopy.enableServerSideEncryption=true 392 | 393 | 394 | 395 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 396 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 397 | # TOCPayloadHandler 'VALIDATE' mode handlers 398 | # 399 | # The class used to handle inbound TOCPayload's [validate mode] 400 | # that a Worker receives from the SQS TOCQueue; you must 401 | # configure ONLY ONE and comment out the others! 402 | # 403 | # - ValidatingTOCPayloadHandler 404 | # - Can be used for validating FileCopyTOCPayloadHandler 405 | # based writes, OR S3KeyCopyingTOCPayloadHandler based 406 | # writes. 407 | # 408 | # 409 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 410 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 411 | 412 | # for VALIDATE mode, this one by default checks file existence AND size match up 413 | tocPayloadHandler.validate.class=org.bitsofinfo.s3.toc.ValidatingTOCPayloadHandler 414 | 415 | # for VALIDATE mode 416 | # - validateEverywhere = will check both local FS + S3 object meta-data (only valid in conjunction w/ FileCopyTOCPayloadHandler) 417 | # - validateLocallyOnly = will check ONLY local FS (only valid in conjunction w/ FileCopyTOCPayloadHandler) 418 | # - validateS3Only = will check ONLY s3 (valid w/ either FileCopyTOCPayloadHandler OR S3KeyCopyingTOCPayloadHandler) 419 | # - validateLocallyThenS3OnFailure = will check local FS, if that fails, then check on S3 (only valid in conjunction w/ FileCopyTOCPayloadHandler) 420 | tocPayloadHandler.validate.mode=validateLocallyThenS3OnFailure 421 | tocPayloadHandler.validate.s3.bucketName=bucketNameToValidateIfS3ModeEnabled 422 | 423 | 424 | 425 | 426 | 427 | --------------------------------------------------------------------------------