├── .gitignore ├── LICENSE ├── README.md ├── build.gradle ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── hdfs-mesos.sh ├── lib ├── mesos-0.28.1-patched.jar └── util-mesos-0.1.0.0.jar ├── src ├── java │ └── net │ │ └── elodina │ │ └── mesos │ │ └── hdfs │ │ ├── Cli.java │ │ ├── Executor.java │ │ ├── HdfsProcess.java │ │ ├── HttpServer.java │ │ ├── Node.java │ │ ├── NodeCli.java │ │ ├── Nodes.java │ │ ├── Scheduler.java │ │ ├── SchedulerCli.java │ │ └── Storage.java └── test │ └── net │ └── elodina │ └── mesos │ └── hdfs │ ├── CliTest.java │ ├── HdfsMesosTestCase.java │ ├── HttpServerTest.java │ ├── NodeTest.java │ ├── NodesTest.java │ └── SchedulerTest.java └── vagrant ├── README.md ├── Vagrantfile └── init.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | todo.txt 4 | 5 | *.iml 6 | *.ipr 7 | *.iws 8 | .idea/ 9 | 10 | .gradle/ 11 | vagrant/.vagrant 12 | 13 | build/ 14 | tmp/ 15 | hdfs-mesos-*.jar 16 | hdfs-mesos.json 17 | hdfs-mesos.properties 18 | 19 | hadoop-*.tar.gz -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HDFS Mesos 2 | ========== 3 | * [Intro](#intro) 4 | * [Mesos in Vagrant](#mesos-in-vagrant) 5 | * [Running Scheduler](#running-scheduler) 6 | * [Running HDFS cluster](#running-hdfs-cluster) 7 | * [Using CLI](#using-cli) 8 | * [Using REST](#using-rest) 9 | * [Having Issue](#having-issue) 10 | 11 | 12 | Intro 13 | ----- 14 | This project allows running HDFS on Mesos. 15 | 16 | You should be familiar with HDFS and Mesos basics: 17 | - http://mesos.apache.org/documentation/latest/ 18 | - https://hadoop.apache.org/docs/r2.7.2/hdfs_design.html 19 | 20 | Project requires: 21 | - Mesos 0.23.0+ 22 | - JDK 1.7.x 23 | - Hadoop 1.2.x or 2.7.x 24 | 25 | 26 | Mesos in Vagrant 27 | ---------------- 28 | Project includes [vagrant environment](/vagrant), that allows to run Mesos cluster locally. 29 | 30 | If you are going to use external Mesos cluster, you can skip this section. 31 | 32 | **1.** Start vagrant nodes: 33 | ``` 34 | # cd hdfs-mesos/vagrant 35 | # vagrant up 36 | ``` 37 | It creates mesos master and slave nodes. 38 | 39 | **2.** Add [vagrant node names](vagrant/README.md#host-names) to `/etc/hosts` 40 | 41 | Now Mesos in vagrant should be running. You can proceed with starting scheduler. 42 | 43 | For more details about vagrant environment please read [vagrant/README.md](vagrant/README.md) 44 | 45 | 46 | Running Scheduler 47 | ----------------- 48 | **1.** Download `hdfs-mesos\*.jar` OR clone & build the project: 49 | 50 | Download jar: 51 | ``` 52 | # mkdir hdfs-mesos 53 | # cd hdfs-mesos 54 | # wget https://github.com/elodina/hdfs-mesos/releases/download/0.0.1.0/hdfs-mesos-0.0.1.0.jar 55 | ``` 56 | 57 | *OR* clone & build: 58 | ``` 59 | # git clone https://github.com/elodina/hdfs-mesos.git 60 | # cd hdfs-mesos 61 | # ./gradlew jar 62 | ``` 63 | 64 | **2.** Download hadoop tarball: 65 | ``` 66 | # wget https://archive.apache.org/dist/hadoop/core/hadoop-2.7.2/hadoop-2.7.2.tar.gz 67 | ``` 68 | 69 | **3.** Start scheduler: 70 | ``` 71 | # ./hdfs-mesos.sh scheduler --api=http://$scheduler:7000 --master=zk://$master:2181/mesos --user=vagrant 72 | 2016-03-18 15:04:48,785 [main] INFO hdfs.Scheduler - Starting Scheduler: 73 | api: http://$scheduler:7000 74 | files: jar:./hdfs-mesos-0.0.1.0.jar, hadoop:./hadoop-1.2.1.tar.gz 75 | mesos: master:master:5050, user:vagrant, principal:, secret: 76 | framework: name:hdfs, role:*, timeout:30d 77 | 2016-03-18 15:04:48,916 [main] INFO hdfs.HttpServer - started on port 7000 78 | I0318 15:04:49.008314 19123 sched.cpp:164] Version: 0.25.0 79 | I0318 15:04:49.017160 19155 sched.cpp:262] New master detected at master@192.168.3.5:5050 80 | I0318 15:04:49.019287 19155 sched.cpp:272] No credentials provided. Attempting to register without authentication 81 | I0318 15:04:49.029218 19155 sched.cpp:641] Framework registered with 20160310-141004-84125888-5050-10895-0006 82 | 2016-03-18 15:04:49,044 [Thread-17] INFO hdfs.Scheduler - [registered] framework:#-0006 master:#326bb pid:master@192.168.3.5:5050 hostname:master 83 | 2016-03-18 15:04:49,078 [Thread-18] INFO hdfs.Scheduler - [resourceOffers] 84 | slave0#-O761 cpus:1.00; mem:2500.00; disk:35164.00; ports:[5000..32000] 85 | master#-O762 cpus:1.00; mem:2500.00; disk:35164.00; ports:[5000..32000] 86 | ... 87 | 2016-03-18 15:04:49,078 [Thread-18] INFO hdfs.Scheduler - [resourceOffers] 88 | ``` 89 | where: 90 | - `$scheduler` is scheduler address accessible from slave nodes; 91 | - `$master` master address accessible from scheduler node; 92 | 93 | Scheduler should register itself and start receiving resource offers. 94 | If scheduler is not receiving offers it could be required to specify LIBPROCESS_IP: 95 | ``` 96 | # export LIBPROCESS_IP=$scheduler_ip 97 | ``` 98 | 99 | Now scheduler should be running and you can proceed with starting HDFS nodes. 100 | 101 | 102 | Running HDFS Cluster 103 | --------------------- 104 | Project provides CLI & REST API for managing HDFS nodes. We will focus first on CLI. 105 | 106 | **1.** Add namenode & datanode: 107 | ``` 108 | # ./hdfs-mesos.sh node add nn --type=namenode 109 | node added: 110 | id: nn 111 | type: namenode 112 | state: idle 113 | resources: cpus:0.5, mem:512 114 | 115 | # ./hdfs-mesos.sh node add dn0 --type=datanode 116 | node added: 117 | id: dn0 118 | type: datanode 119 | state: idle 120 | resources: cpus:0.5, mem:512 121 | ``` 122 | 123 | **2.** Start nodes: 124 | ``` 125 | # ./hdfs-mesos.sh node start \* 126 | nodes started: 127 | id: nn 128 | type: namenode 129 | state: running 130 | resources: cpus:0.5, mem:512 131 | reservation: cpus:0.5, mem:512, ports:http=5000,ipc=5001 132 | runtime: 133 | task: 383aaab9-982b-400e-aa35-463e66cdcb3b 134 | executor: 19065e07-a006-49a4-8f2b-636d8b1f2ad6 135 | slave: 241be3a2-39bc-417c-a967-82b4018a0762-S0 (master) 136 | 137 | id: dn0 138 | type: datanode 139 | state: running 140 | resources: cpus:0.5, mem:512 141 | reservation: cpus:0.5, mem:512, ports:http=5002,ipc=5003,data=5004 142 | runtime: 143 | task: 37f3bcbb-10a5-4323-96d2-aef8846aa281 144 | executor: 088463c9-5f2e-4d1d-8195-56427168b86f 145 | slave: 241be3a2-39bc-417c-a967-82b4018a0762-S0 (master) 146 | ``` 147 | 148 | Nodes are up & running now. 149 | 150 | Note: starting may take some time. You can view the progress via Mesos UI. 151 | 152 | **3.** Do some FS operations: 153 | ``` 154 | # hadoop fs -mkdir hdfs://master:5001/dir 155 | # hadoop fs -ls hdfs://master:5001/ 156 | Found 1 items 157 | drwxr-xr-x - vagrant supergroup 0 2016-03-17 12:46 /dir 158 | ``` 159 | Note: namenode host and ipc port is used in fs url. 160 | 161 | 162 | Using CLI 163 | --------- 164 | Project provides CLI with following structure: 165 | ``` 166 | # ./hdfs-mesos.sh help 167 | Usage: ... 168 | 169 | Commands: 170 | help [cmd [cmd]] - print general or command-specific help 171 | scheduler - start scheduler 172 | node - node management 173 | ``` 174 | 175 | Help is provided for each command and sub-command: 176 | ``` 177 | # ./hdfs-mesos.sh help node 178 | Node management commands 179 | Usage: node 180 | 181 | Commands: 182 | list - list nodes 183 | add - add node 184 | update - update node 185 | start - start node 186 | stop - stop node 187 | remove - remove node 188 | 189 | Run `help node ` to see details of specific command 190 | 191 | # ./hdfs-mesos.sh help node add 192 | Add node 193 | Usage: node add [options] 194 | 195 | Option (* = required) Description 196 | --------------------- ----------- 197 | --core-site-opts Hadoop core-site.xml options. 198 | --cpus CPU amount (0.5, 1, 2). 199 | --executor-jvm-opts Executor JVM options. 200 | --hadoop-jvm-opts Hadoop JVM options. 201 | --hdfs-site-opts Hadoop hdfs-site.xml options. 202 | --mem Mem amount in Mb. 203 | * --type node type (name_node, data_node). 204 | 205 | Generic Options 206 | Option Description 207 | ------ ----------- 208 | --api REST api url (same as --api option for 209 | scheduler). 210 | ``` 211 | 212 | All node-related commands support bulk operations using node-id-expressions. 213 | Examples: 214 | ``` 215 | # ./hdfs-mesos.sh node add dn0..1 --type=datanode 216 | nodes added: 217 | id: dn0 218 | type: datanode 219 | ... 220 | 221 | id: dn1 222 | type: datanode 223 | ... 224 | 225 | # ./hdfs-mesos.sh node update dn* --cpus=1 226 | nodes updated: 227 | id: dn0 228 | ... 229 | resources: cpus:1.0, mem:512 230 | 231 | id: dn1 232 | ... 233 | resources: cpus:1.0, mem:512 234 | 235 | # ./hdfs-mesos.sh node start dn0,dn1 236 | nodes started: 237 | id: dn0 238 | ... 239 | 240 | id: dn0 241 | ... 242 | ``` 243 | 244 | Id expression examples: 245 | - `nn` - matches node with id nn 246 | - `*` - matches any node (should be slash-escaped in shell) 247 | - `dn*` - matches node with id starting with dn 248 | - `dn0..2` - matches nodes dn0, dn1, dn2 249 | 250 | 251 | Using REST 252 | ---------- 253 | Scheduler uses embedded HTTP server. Server serves two functions: 254 | - distributing binaries of Hadoop, JRE and executor; 255 | - serving REST API, invoked by CLI; 256 | 257 | Most CLI commands map to REST API call. Examples: 258 | 259 | | CLI command | REST call | 260 | |--------------------------------------------|---------------------------------------------| 261 | |`node add nn --type=namenode --cpus=2` |`/api/node/add?node=nn&type=namenode&cpus=2` | 262 | |`node start dn* --timeout=3m-` |`/api/node/start?node=dn*&timeout=3m` | 263 | |`node remove dn5` |`/api/node/remove?node=dn5` | 264 | 265 | REST calls accepts plain HTTP params and return JSON responses. 266 | Examples: 267 | ``` 268 | # curl http://$scheduler:7000/api/node/list 269 | [ 270 | { 271 | "id": "nn", 272 | "type": "namenode", 273 | ... 274 | }, 275 | { 276 | "id": "dn0", 277 | "type": "datanode", 278 | ... 279 | } 280 | ] 281 | 282 | # curl http://$scheduler:7000/api/node/start?node=nn,dn0 283 | { 284 | "status": "started", 285 | "nodes": [ 286 | { 287 | "id": "nn", 288 | "state": "running", 289 | ... 290 | }, 291 | { 292 | "id": "dn0", 293 | "state": "running", 294 | ... 295 | } 296 | ] 297 | } 298 | ``` 299 | 300 | CLI params maps one-to-one to REST params. CLI params use dashed style 301 | while REST params use camel-case. Example of mappings: 302 | 303 | | CLI param | REST param | 304 | |--------------------------------------------|---------------------------------------------| 305 | |`` (node add\|update\|...) |`node` | 306 | |`timeout` (node start\|stop) |`timeout` | 307 | |`core-site-opts` (node add\|update) |`coreSiteOpts` | 308 | |`executor-jvm-opts` (node add\|update) |`executorJvmOpts` | 309 | 310 | REST API call could return error in some cases. 311 | Errors are marked with status code other than 200. Error response is returned in JSON format. 312 | 313 | Example: 314 | ``` 315 | # curl -v http://192.168.3.1:7000/api/node/start?node=unknown 316 | ... 317 | HTTP/1.1 400 node not found 318 | ... 319 | {"error":"node not found","code":400} 320 | ``` 321 | 322 | For more detail on REST API please refer to sources. 323 | 324 | 325 | Having Issue 326 | ------------ 327 | Please read this README carefully, to make sure you problem is not already described. 328 | 329 | Also make sure that your issue is not duplicating any existing one. 330 | 331 | **DO NOT** post general question like "I am having problem with mesos" 332 | to the issue list. Please use generic QA sites like http://stackoverflow.com 333 | for that. 334 | 335 | General rules for posting issues are: 336 | - be precise: provide a minimalistic reproduce-scenario; 337 | - provide details: provide all required log snippets (stdout/err from scheduler or stdout/err from executor's sandbox, mesos logs if required); 338 | - be helpful: provide PR for the bug-fix if possible; 339 | 340 | Issues list: https://github.com/elodina/hdfs-mesos/issues 341 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: 'idea' 2 | apply plugin: 'java' 3 | 4 | version = '0.1.0.0' 5 | 6 | jar.archiveName = "hdfs-mesos-${version}.jar" 7 | buildDir = 'out/gradle' 8 | libsDirName = '../../' 9 | 10 | repositories { 11 | mavenCentral() 12 | flatDir { dirs 'lib' } 13 | } 14 | 15 | sourceSets { 16 | main { 17 | java { srcDirs = ['src/java'] } 18 | } 19 | test { 20 | java { srcDirs = ['src/test'] } 21 | } 22 | } 23 | 24 | dependencies { 25 | compile name: "mesos-0.28.1-patched" 26 | compile name: "util-mesos-0.1.0.0" 27 | compile "com.google.protobuf:protobuf-java:2.5.0" 28 | compile 'com.googlecode.protobuf-java-format:protobuf-java-format:1.4' 29 | compile "org.eclipse.jetty:jetty-http:9.0.4.v20130625" 30 | compile "org.eclipse.jetty:jetty-io:9.0.4.v20130625" 31 | compile "org.eclipse.jetty:jetty-security:9.0.4.v20130625" 32 | compile "org.eclipse.jetty:jetty-server:9.0.4.v20130625" 33 | compile "org.eclipse.jetty:jetty-servlet:9.0.4.v20130625" 34 | compile "org.eclipse.jetty:jetty-util:9.0.4.v20130625" 35 | compile "javax.servlet:javax.servlet-api:3.0.1" 36 | compile "net.sf.jopt-simple:jopt-simple:4.8" 37 | compile 'com.googlecode.json-simple:json-simple:1.1.1' 38 | compile "log4j:log4j:1.2.16" 39 | compile "com.101tec:zkclient:0.6" 40 | testCompile 'junit:junit:4.12' 41 | } 42 | 43 | jar { 44 | dependsOn 'test' 45 | doFirst { 46 | from(configurations.compile.collect { it.isDirectory() ? it : zipTree(it) }) { 47 | exclude "*" 48 | exclude "about_files/*" 49 | exclude "META-INF/*.SF" 50 | exclude "META-INF/*.DSA" 51 | exclude "META-INF/*.RSA" 52 | } 53 | } 54 | manifest.attributes("Main-Class": "net.elodina.mesos.hdfs.Cli") 55 | } 56 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elodina/hdfs-mesos/7edc583e7d393c8ef8fa5f5aa58700b7f55e7701/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Mon Jan 18 17:40:18 EET 2016 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-2.0-all.zip 7 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 10 | DEFAULT_JVM_OPTS="" 11 | 12 | APP_NAME="Gradle" 13 | APP_BASE_NAME=`basename "$0"` 14 | 15 | # Use the maximum available, or set MAX_FD != -1 to use that value. 16 | MAX_FD="maximum" 17 | 18 | warn ( ) { 19 | echo "$*" 20 | } 21 | 22 | die ( ) { 23 | echo 24 | echo "$*" 25 | echo 26 | exit 1 27 | } 28 | 29 | # OS specific support (must be 'true' or 'false'). 30 | cygwin=false 31 | msys=false 32 | darwin=false 33 | case "`uname`" in 34 | CYGWIN* ) 35 | cygwin=true 36 | ;; 37 | Darwin* ) 38 | darwin=true 39 | ;; 40 | MINGW* ) 41 | msys=true 42 | ;; 43 | esac 44 | 45 | # For Cygwin, ensure paths are in UNIX format before anything is touched. 46 | if $cygwin ; then 47 | [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"` 48 | fi 49 | 50 | # Attempt to set APP_HOME 51 | # Resolve links: $0 may be a link 52 | PRG="$0" 53 | # Need this for relative symlinks. 54 | while [ -h "$PRG" ] ; do 55 | ls=`ls -ld "$PRG"` 56 | link=`expr "$ls" : '.*-> \(.*\)$'` 57 | if expr "$link" : '/.*' > /dev/null; then 58 | PRG="$link" 59 | else 60 | PRG=`dirname "$PRG"`"/$link" 61 | fi 62 | done 63 | SAVED="`pwd`" 64 | cd "`dirname \"$PRG\"`/" >&- 65 | APP_HOME="`pwd -P`" 66 | cd "$SAVED" >&- 67 | 68 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 69 | 70 | # Determine the Java command to use to start the JVM. 71 | if [ -n "$JAVA_HOME" ] ; then 72 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 73 | # IBM's JDK on AIX uses strange locations for the executables 74 | JAVACMD="$JAVA_HOME/jre/sh/java" 75 | else 76 | JAVACMD="$JAVA_HOME/bin/java" 77 | fi 78 | if [ ! -x "$JAVACMD" ] ; then 79 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 80 | 81 | Please set the JAVA_HOME variable in your environment to match the 82 | location of your Java installation." 83 | fi 84 | else 85 | JAVACMD="java" 86 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 87 | 88 | Please set the JAVA_HOME variable in your environment to match the 89 | location of your Java installation." 90 | fi 91 | 92 | # Increase the maximum file descriptors if we can. 93 | if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then 94 | MAX_FD_LIMIT=`ulimit -H -n` 95 | if [ $? -eq 0 ] ; then 96 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 97 | MAX_FD="$MAX_FD_LIMIT" 98 | fi 99 | ulimit -n $MAX_FD 100 | if [ $? -ne 0 ] ; then 101 | warn "Could not set maximum file descriptor limit: $MAX_FD" 102 | fi 103 | else 104 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 105 | fi 106 | fi 107 | 108 | # For Darwin, add options to specify how the application appears in the dock 109 | if $darwin; then 110 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 111 | fi 112 | 113 | # For Cygwin, switch paths to Windows format before running java 114 | if $cygwin ; then 115 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 116 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules 158 | function splitJvmOpts() { 159 | JVM_OPTS=("$@") 160 | } 161 | eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS 162 | JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" 163 | 164 | exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" 165 | -------------------------------------------------------------------------------- /hdfs-mesos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | jar='hdfs-mesos*.jar' 3 | 4 | check_jar() { 5 | jars=$(find . -maxdepth 1 -name "$jar" | wc -l) 6 | 7 | if [ $jars -eq 0 ] 8 | then 9 | echo "$jar not found" 10 | exit 1 11 | elif [ $jars -gt 1 ] 12 | then 13 | echo "More than one $jar found" 14 | exit 1 15 | fi 16 | } 17 | 18 | check_jar 19 | java -jar $jar "$@" 20 | 21 | -------------------------------------------------------------------------------- /lib/mesos-0.28.1-patched.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elodina/hdfs-mesos/7edc583e7d393c8ef8fa5f5aa58700b7f55e7701/lib/mesos-0.28.1-patched.jar -------------------------------------------------------------------------------- /lib/util-mesos-0.1.0.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elodina/hdfs-mesos/7edc583e7d393c8ef8fa5f5aa58700b7f55e7701/lib/util-mesos-0.1.0.0.jar -------------------------------------------------------------------------------- /src/java/net/elodina/mesos/hdfs/Cli.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import joptsimple.OptionException; 4 | import joptsimple.OptionParser; 5 | import joptsimple.OptionSet; 6 | import net.elodina.mesos.util.Request; 7 | import org.json.simple.JSONAware; 8 | import org.json.simple.parser.JSONParser; 9 | import org.json.simple.parser.ParseException; 10 | 11 | import java.io.*; 12 | import java.util.*; 13 | 14 | public class Cli { 15 | static String api; 16 | static PrintStream out = System.out; 17 | static PrintStream err = System.err; 18 | 19 | public static void main(String... args) { 20 | try { 21 | handle(new ArrayList<>(Arrays.asList(args))); 22 | } catch (Error e) { 23 | err.println("Error: " + e.getMessage()); 24 | System.exit(1); 25 | } 26 | } 27 | 28 | static void handle(List args) { 29 | if (args.isEmpty()) throw new Error("command required"); 30 | 31 | String cmd = args.remove(0); 32 | if (!cmd.equals("help") && !cmd.equals("scheduler")) 33 | args = handleGenericOptions(args, false); 34 | 35 | switch (cmd) { 36 | case "help": handleHelp(args); break; 37 | case "scheduler": SchedulerCli.handle(args, false); break; 38 | case "node": NodeCli.handle(args, false); break; 39 | default: throw new Error("unsupported command " + cmd); 40 | } 41 | } 42 | 43 | private static void handleHelp(List args) { 44 | String cmd = args.isEmpty() ? null : args.remove(0); 45 | if (cmd == null) { 46 | printLine("Usage: ...\n"); 47 | printCmds(); 48 | 49 | printLine(); 50 | printLine("Run `help ` to see details of specific command"); 51 | return; 52 | } 53 | 54 | switch (cmd) { 55 | case "help": 56 | printLine("Print general or command-specific help\nUsage: help [cmd [cmd]]"); 57 | break; 58 | case "scheduler": SchedulerCli.handle(args, true); break; 59 | case "node": NodeCli.handle(args, true); break; 60 | default: throw new Error("unsupported command " + cmd); 61 | } 62 | } 63 | 64 | private static void printCmds() { 65 | printLine("Commands:"); 66 | printLine("help [cmd [cmd]] - print general or command-specific help", 1); 67 | printLine("scheduler - start scheduler", 1); 68 | printLine("node - node management", 1); 69 | } 70 | 71 | 72 | static List handleGenericOptions(List args, boolean help) { 73 | OptionParser parser = new OptionParser(); 74 | parser.accepts("api", "REST api url (same as --api option for scheduler).") 75 | .withOptionalArg().ofType(String.class); 76 | 77 | parser.allowsUnrecognizedOptions(); 78 | 79 | if (help) { 80 | printLine("Generic Options"); 81 | 82 | try { parser.printHelpOn(out); } 83 | catch (IOException ignore) {} 84 | 85 | return args; 86 | } 87 | 88 | OptionSet options; 89 | try { options = parser.parse(args.toArray(new String[args.size()])); } 90 | catch (OptionException e) { 91 | try { parser.printHelpOn(out); } 92 | catch (IOException ignore) {} 93 | 94 | printLine(); 95 | throw new Error(e.getMessage()); 96 | } 97 | 98 | resolveApi((String) options.valueOf("api")); 99 | 100 | @SuppressWarnings("unchecked") List result = (List) options.nonOptionArguments(); 101 | return new ArrayList<>(result); 102 | } 103 | 104 | private static void resolveApi(String api) { 105 | if (Cli.api != null) return; 106 | 107 | if (api != null && !api.equals("")) { 108 | Cli.api = api; 109 | return; 110 | } 111 | 112 | if (System.getenv("HM_API") != null) { 113 | Cli.api = System.getenv("HM_API"); 114 | return; 115 | } 116 | 117 | File file = new File("hdfs-mesos.properties"); 118 | if (file.exists()) { 119 | Properties props = new Properties(); 120 | try (InputStream stream = new FileInputStream(file)) { props.load(stream); } 121 | catch (IOException e) { throw new IOError(e); } 122 | 123 | Cli.api = props.getProperty("api"); 124 | if (Cli.api != null) return; 125 | } 126 | 127 | throw new Error("Undefined API url. Please provide one of following: CLI --api option, HM_API env var, api var in hdfs-mesos.properties."); 128 | } 129 | 130 | static T sendRequest(String uri, Map params) throws IOException { 131 | String url = api + (api.endsWith("/") ? "" : "/") + "api" + uri; 132 | Request.Response response = new Request(url) 133 | .params(params) 134 | .method(Request.Method.POST) 135 | .contentType("application/x-www-form-urlencoded; charset=utf-8") 136 | .send(); 137 | 138 | if (response.code() != 200) throw new IOException("Error " + response.code() + ": " + response.message()); 139 | 140 | String text = response.text(); 141 | if (text == null) return null; 142 | 143 | JSONAware json; 144 | try { json = (JSONAware) new JSONParser().parse(text); } 145 | catch (ParseException e) { throw new IOException(e); } 146 | 147 | @SuppressWarnings("unchecked") T result = (T) json; 148 | return result; 149 | } 150 | 151 | static void printLine() { printLine(""); } 152 | 153 | static void printLine(String s) { printLine(s, 0); } 154 | 155 | static void printLine(String s, int indent) { 156 | char[] c = new char[2 * indent]; 157 | Arrays.fill(c, ' '); 158 | out.println(new String(c) + s); 159 | } 160 | 161 | static class Error extends RuntimeException { 162 | Error(String message) { 163 | super(message); 164 | } 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/java/net/elodina/mesos/hdfs/Executor.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import joptsimple.OptionException; 4 | import joptsimple.OptionParser; 5 | import joptsimple.OptionSet; 6 | import net.elodina.mesos.api.Framework; 7 | import net.elodina.mesos.api.Slave; 8 | import net.elodina.mesos.api.Task; 9 | import net.elodina.mesos.api.driver.DriverException; 10 | import net.elodina.mesos.api.driver.ExecutorDriver; 11 | import net.elodina.mesos.api.driver.ExecutorDriverV0; 12 | import net.elodina.mesos.api.driver.ExecutorDriverV1; 13 | import net.elodina.mesos.util.Base64; 14 | import net.elodina.mesos.util.IO; 15 | import net.elodina.mesos.util.Version; 16 | import org.apache.log4j.*; 17 | import org.json.simple.JSONObject; 18 | import org.json.simple.parser.JSONParser; 19 | import org.json.simple.parser.ParseException; 20 | 21 | import java.io.File; 22 | import java.io.IOException; 23 | import java.io.PrintWriter; 24 | import java.io.StringWriter; 25 | import java.util.Arrays; 26 | 27 | public class Executor implements net.elodina.mesos.api.Executor { 28 | public static final Logger logger = Logger.getLogger(Executor.class); 29 | 30 | public static boolean debug; 31 | public static String driverVersion = "v0"; 32 | public static boolean driverV1() { return driverVersion.equals("v1"); } 33 | 34 | public static File hadoopDir; 35 | public static Version hadoopVersion; 36 | 37 | public static File dataDir; 38 | public static File javaHome; 39 | 40 | public static boolean hadoop1x() { return hadoopVersion.compareTo(new Version("2.0")) < 0; } 41 | 42 | public static File hdfs() { return new File(hadoopDir, hadoop1x() ? "bin/hadoop" : "/bin/hdfs"); } 43 | 44 | public static File hadoopConfDir() { return new File(hadoopDir, hadoop1x() ? "conf" : "etc/hadoop"); } 45 | 46 | private ExecutorDriver driver; 47 | private String hostname; 48 | private HdfsProcess process; 49 | 50 | @Override 51 | public void registered(ExecutorDriver driver, Task.Executor executor, Framework framework, Slave slave) { 52 | logger.info("[registered] " + (framework != null ? "framework:[" + framework.toString(true) : "]") + " slave:[" + slave.toString(true) + "]"); 53 | this.driver = driver; 54 | hostname = slave.hostname(); 55 | } 56 | 57 | @Override 58 | public void disconnected() { 59 | logger.info("[disconnected]"); 60 | driver = null; 61 | } 62 | 63 | @Override 64 | public void launchTask(final Task task) { 65 | logger.info("[launchTask] " + task.toString(true)); 66 | 67 | new Thread() { 68 | @Override 69 | public void run() { 70 | setName("ProcessRunner"); 71 | 72 | try { runHdfs(task); } 73 | catch (Throwable t) { 74 | logger.error("", t); 75 | 76 | StringWriter buffer = new StringWriter(); 77 | t.printStackTrace(new PrintWriter(buffer, true)); 78 | 79 | try { driver.sendStatus(new Task.Status(task.id(), Task.State.ERROR).message("" + buffer)); } 80 | catch (DriverException de) { logger.error("", de); } 81 | } 82 | 83 | driver.stop(); 84 | 85 | if (driverV1()) { 86 | logger.info("Exiting process"); 87 | System.exit(0); // exiting cause there is no reliable way to interrupt blocked socket read 88 | } 89 | } 90 | }.start(); 91 | } 92 | 93 | private void runHdfs(Task task) throws InterruptedException, IOException { 94 | String data = new String(task.data(), "utf-8"); 95 | if (driverV1()) data = Base64.decode(data); 96 | 97 | JSONObject json; 98 | try { json = (JSONObject) new JSONParser().parse(data); } 99 | catch (ParseException e) { throw new IllegalStateException(e); } 100 | Node node = new Node(json); 101 | 102 | process = new HdfsProcess(node, hostname); 103 | process.start(); 104 | driver.sendStatus(new Task.Status(task.id(), Task.State.STARTING)); 105 | 106 | if (process.waitForOperable()) 107 | driver.sendStatus(new Task.Status(task.id(), Task.State.RUNNING)); 108 | 109 | int code = process.waitFor(); 110 | if (code == 0 || code == 143) driver.sendStatus(new Task.Status(task.id(), Task.State.FINISHED)); 111 | else driver.sendStatus(new Task.Status(task.id(), Task.State.FAILED).message("process exited with " + code)); 112 | } 113 | 114 | @Override 115 | public void killTask(String id) { 116 | logger.info("[killTask] " + id); 117 | if (process != null) process.stop(); 118 | } 119 | 120 | @Override 121 | public void message(byte[] data) { 122 | logger.info("[message] " + new String(data)); 123 | } 124 | 125 | @Override 126 | public void shutdown() { 127 | logger.info("[shutdown]"); 128 | } 129 | 130 | @Override 131 | public void error(String message) { 132 | logger.info("[error] " + message); 133 | } 134 | 135 | public static void main(String[] args) { 136 | parseArgs(args); 137 | initLogging(); 138 | initDirs(); 139 | 140 | Executor executor = new Executor(); 141 | ExecutorDriver driver = driverV1() ? new ExecutorDriverV1(executor) : new ExecutorDriverV0(executor); 142 | 143 | boolean ok = driver.run(); 144 | System.exit(ok ? 0 : 1); 145 | } 146 | 147 | static void initDirs() { 148 | String hadoopMask = "hadoop-.*"; 149 | hadoopDir = IO.findDir(new File("."), hadoopMask); 150 | if (hadoopDir == null) throw new IllegalStateException(hadoopMask + " not found in current dir"); 151 | 152 | int hyphenIdx = hadoopDir.getName().lastIndexOf("-"); 153 | if (hyphenIdx == -1) throw new IllegalStateException("Can't extract version from " + hadoopDir); 154 | hadoopVersion = new Version(hadoopDir.getName().substring(hyphenIdx + 1)); 155 | 156 | dataDir = new File(new File("."), "data"); 157 | javaHome = findJavaHome(); 158 | 159 | logger.info("Resolved dirs:\nhadoopDir=" + hadoopDir + "\ndataDir=" + dataDir + "\njavaHome=" + javaHome); 160 | } 161 | 162 | static File findJavaHome() { 163 | File jreDir = IO.findDir(new File("."), "jre.*"); 164 | if (jreDir != null) return jreDir; 165 | 166 | if (System.getenv("JAVA_HOME") != null) 167 | return new File(System.getenv("JAVA_HOME")); 168 | 169 | if (System.getenv("PATH") != null) 170 | for (String part : System.getenv("PATH").split(":")) { 171 | part = part.trim(); 172 | if (part.startsWith("\"") && part.endsWith("\"")) 173 | part = part.substring(1, part.length() - 1); 174 | 175 | File java = new File(part, "java"); 176 | if (java.isFile() && java.canRead()) { 177 | File dir = javaHomeDir(java); 178 | if (dir != null) return dir; 179 | } 180 | } 181 | 182 | throw new IllegalStateException("Can't resolve JAVA_HOME / find jre"); 183 | } 184 | 185 | private static File javaHomeDir(File java) { 186 | try { 187 | File tmpFile = File.createTempFile("java_home", null); 188 | 189 | Process process = new ProcessBuilder("readlink", "-f", java.getAbsolutePath()) 190 | .redirectError(ProcessBuilder.Redirect.INHERIT) 191 | .redirectOutput(tmpFile).start(); 192 | 193 | int code = process.waitFor(); 194 | if (code != 0) throw new IOException("Process exited with code " + code); 195 | 196 | File file = new File(IO.readFile(tmpFile).trim()); // $JRE_PATH/bin/java 197 | if (!tmpFile.delete()) throw new IOException("Failed to delete " + tmpFile); 198 | 199 | file = file.getParentFile(); 200 | if (file != null) file = file.getParentFile(); 201 | return file; 202 | } catch (IOException | InterruptedException e) { 203 | logger.warn("", e); 204 | return null; 205 | } 206 | } 207 | 208 | private static void parseArgs(String... args) { 209 | OptionParser parser = new OptionParser(); 210 | parser.accepts("debug", "Enable debug logging. Default - false").withRequiredArg().ofType(Boolean.class); 211 | parser.accepts("driver", "Mesos driver version (v0, v1). Default - " + driverVersion).withRequiredArg().ofType(String.class); 212 | 213 | boolean help = args.length > 0 && args[0].equals("help"); 214 | if (help) { 215 | System.out.println("Generic Options"); 216 | 217 | try { parser.printHelpOn(System.out); } 218 | catch (IOException ignore) {} 219 | 220 | System.exit(0); 221 | } 222 | 223 | OptionSet options = null; 224 | try { options = parser.parse(args); } 225 | catch (OptionException e) { 226 | try { parser.printHelpOn(System.out); } 227 | catch (IOException ignore) {} 228 | 229 | System.err.println(e.getMessage()); 230 | System.exit(1); 231 | } 232 | 233 | Boolean debug = (Boolean) options.valueOf("debug"); 234 | 235 | String driver = (String) options.valueOf("driver"); 236 | if (driver != null && !Arrays.asList("v0", "v1").contains(driver)) { 237 | System.err.println("Invalid driver"); 238 | System.exit(1); 239 | } 240 | 241 | if (debug != null) Executor.debug = debug; 242 | if (driver != null) Executor.driverVersion = driver; 243 | } 244 | 245 | static void initLogging() { 246 | BasicConfigurator.resetConfiguration(); 247 | 248 | Logger root = Logger.getRootLogger(); 249 | root.setLevel(Level.INFO); 250 | 251 | Logger.getLogger("net.elodina.mesos.api").setLevel(debug ? Level.DEBUG : Level.INFO); 252 | 253 | PatternLayout layout = new PatternLayout("[executor] %d [%t] %p %c{2} - %m%n"); 254 | root.addAppender(new ConsoleAppender(layout)); 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /src/java/net/elodina/mesos/hdfs/HdfsProcess.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import net.elodina.mesos.util.IO; 4 | import net.elodina.mesos.util.Net; 5 | import net.elodina.mesos.util.Period; 6 | import net.elodina.mesos.util.Strings; 7 | import org.apache.log4j.Logger; 8 | 9 | import java.io.File; 10 | import java.io.IOException; 11 | import java.util.HashMap; 12 | import java.util.Map; 13 | 14 | public class HdfsProcess { 15 | private static Logger logger = Logger.getLogger(HdfsProcess.class); 16 | 17 | private Node node; 18 | private String hostname; 19 | 20 | private Process process; 21 | 22 | public HdfsProcess(Node node, String hostname) { 23 | this.node = node; 24 | this.hostname = hostname; 25 | } 26 | 27 | public void start() throws IOException, InterruptedException { 28 | createCoreSiteXml(); 29 | createHdfsSiteXml(); 30 | configureLogs(); 31 | if (node.type == Node.Type.NAMENODE) formatNameNodeIfRequired(); 32 | 33 | process = startProcess(); 34 | } 35 | 36 | public int waitFor() throws InterruptedException { 37 | if (process == null) throw new IllegalStateException("!started"); 38 | 39 | int code = process.waitFor(); 40 | logger.info("Process finished with code " + code); 41 | 42 | return code; 43 | } 44 | 45 | public boolean waitForOperable() { 46 | if (process == null) throw new IllegalStateException("!started"); 47 | 48 | logger.info("Waiting for process IPC port ready ..."); 49 | while (!isProcessStopped()) { 50 | if (Net.isPortOpen(hostname, node.reservation.ports.get(Node.Port.IPC))) { 51 | logger.info("Process IPC port is ready"); 52 | return true; 53 | } 54 | 55 | Period delay = new Period("1s"); 56 | logger.info("Process IPC port is not ready. Sleeping " + delay); 57 | 58 | try { Thread.sleep(delay.ms()); } 59 | catch (InterruptedException e) { break; } 60 | } 61 | 62 | logger.info("Process IPC port is not ready: process stopped"); 63 | return false; 64 | } 65 | 66 | public void stop() { 67 | logger.info("Stopping process"); 68 | process.destroy(); 69 | } 70 | 71 | private boolean isProcessStopped() { 72 | try { process.exitValue(); return true; } 73 | catch (IllegalThreadStateException e) { return false; } 74 | } 75 | 76 | private void createCoreSiteXml() throws IOException { 77 | Map props = new HashMap<>(); 78 | props.put("hadoop.tmp.dir", getTmpDir().getAbsolutePath()); 79 | props.put("fs.default.name", node.runtime.fsUri); 80 | props.putAll(node.coreSiteOpts); 81 | 82 | File file = new File(Executor.hadoopConfDir(), "core-site.xml"); 83 | writePropsXml(file, props); 84 | } 85 | 86 | private void createHdfsSiteXml() throws IOException { 87 | Map props = new HashMap<>(); 88 | 89 | if (node.type == Node.Type.NAMENODE) 90 | props.put("dfs.http.address", hostname + ":" + node.reservation.ports.get(Node.Port.HTTP)); 91 | else { 92 | props.put("dfs.datanode.http.address", hostname + ":" + node.reservation.ports.get(Node.Port.HTTP)); 93 | props.put("dfs.datanode.address", hostname + ":" + node.reservation.ports.get(Node.Port.DATA)); 94 | props.put("dfs.datanode.ipc.address", hostname + ":" + node.reservation.ports.get(Node.Port.IPC)); 95 | } 96 | 97 | props.putAll(node.hdfsSiteOpts); 98 | 99 | File file = new File(Executor.hadoopConfDir(), "hdfs-site.xml"); 100 | writePropsXml(file, props); 101 | } 102 | 103 | private void writePropsXml(File file, Map props) throws IOException { 104 | String content = "\n"; 105 | 106 | for (String name : props.keySet()) { 107 | content += "\n" + 108 | " " + escapeXmlText(name) + "\n" + 109 | " " + escapeXmlText(props.get(name)) + "\n" + 110 | "\n"; 111 | } 112 | 113 | content += ""; 114 | IO.writeFile(file, content); 115 | } 116 | 117 | private void configureLogs() throws IOException { 118 | Map map = new HashMap<>(); 119 | map.put("log4j.appender.console.layout.ConversionPattern=.*", "log4j.appender.console.layout.ConversionPattern=[" + node.type.name().toLowerCase() + "] %d [%t] %p %c{2} - %m%n"); 120 | map.put("log4j.appender.console.target=.*", "log4j.appender.console.target=System.out"); 121 | 122 | File file = new File(Executor.hadoopConfDir(), "log4j.properties"); 123 | IO.replaceInFile(file, map); 124 | } 125 | 126 | private static String escapeXmlText(String s) { return s.replace("<", "<").replace(">", ">"); } 127 | 128 | private File getNameNodeDir() { 129 | String dir = node.hdfsSiteOpts.get("dfs.name.dir"); // 1.x override 130 | if (dir == null) dir = node.hdfsSiteOpts.get("dfs.namenode.name.dir"); // 2.x override 131 | if (dir == null) dir = new File(getTmpDir(), "dfs/name").getPath(); // default 132 | return new File(dir); 133 | } 134 | 135 | private File getTmpDir() { 136 | String dir = node.coreSiteOpts.get("hadoop.tmp.dir"); 137 | return dir != null ? new File(dir) : Executor.dataDir; 138 | } 139 | 140 | private void formatNameNodeIfRequired() throws IOException, InterruptedException { 141 | boolean formatted = new File(getNameNodeDir(), "current").isDirectory(); 142 | if (formatted) { 143 | logger.info("Namenode is already formatted"); 144 | return; 145 | } 146 | 147 | logger.info("Formatting namenode"); 148 | 149 | ProcessBuilder builder = new ProcessBuilder(Executor.hdfs().getPath(), "namenode", "-format", "-force") 150 | .redirectOutput(ProcessBuilder.Redirect.INHERIT) 151 | .redirectError(ProcessBuilder.Redirect.INHERIT); 152 | 153 | builder.environment().put("JAVA_HOME", "" + Executor.javaHome); 154 | 155 | int code = builder.start().waitFor(); 156 | if (code != 0) throw new IllegalStateException("Failed to format namenode: process exited with " + code); 157 | } 158 | 159 | private Process startProcess() throws IOException { 160 | String cmd; 161 | switch (node.type) { 162 | case NAMENODE: cmd = "namenode"; break; 163 | case DATANODE: cmd = "datanode"; break; 164 | default: throw new IllegalStateException("unsupported node type " + node.type); 165 | } 166 | 167 | ProcessBuilder builder = new ProcessBuilder(Executor.hdfs().getPath(), cmd) 168 | .redirectOutput(ProcessBuilder.Redirect.INHERIT) 169 | .redirectError(ProcessBuilder.Redirect.INHERIT); 170 | 171 | Map env = builder.environment(); 172 | env.put("JAVA_HOME", "" + Executor.javaHome); 173 | if (node.hadoopJvmOpts != null) env.put("HADOOP_OPTS", node.hadoopJvmOpts); 174 | 175 | logger.info("Starting process '" + Strings.join(builder.command(), " ") + "'"); 176 | return builder.start(); 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/java/net/elodina/mesos/hdfs/HttpServer.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import net.elodina.mesos.util.Constraint; 4 | import net.elodina.mesos.util.IO; 5 | import net.elodina.mesos.util.Period; 6 | import net.elodina.mesos.util.Strings; 7 | import org.apache.log4j.Logger; 8 | import org.eclipse.jetty.server.Request; 9 | import org.eclipse.jetty.server.Response; 10 | import org.eclipse.jetty.server.Server; 11 | import org.eclipse.jetty.server.ServerConnector; 12 | import org.eclipse.jetty.servlet.ServletContextHandler; 13 | import org.eclipse.jetty.servlet.ServletHolder; 14 | import org.eclipse.jetty.util.thread.QueuedThreadPool; 15 | import org.json.simple.JSONArray; 16 | import org.json.simple.JSONObject; 17 | 18 | import javax.servlet.ServletException; 19 | import javax.servlet.http.HttpServlet; 20 | import javax.servlet.http.HttpServletRequest; 21 | import javax.servlet.http.HttpServletResponse; 22 | import java.io.File; 23 | import java.io.FileInputStream; 24 | import java.io.IOException; 25 | import java.io.PrintWriter; 26 | import java.util.*; 27 | 28 | public class HttpServer { 29 | private static final Logger logger = Logger.getLogger(HttpServer.class); 30 | 31 | private Server server; 32 | 33 | public void start() throws Exception { 34 | if (server != null) throw new IllegalStateException("started"); 35 | Scheduler.Config config = Scheduler.$.config; 36 | 37 | QueuedThreadPool threadPool = new QueuedThreadPool(Runtime.getRuntime().availableProcessors() * 16); 38 | threadPool.setName("Jetty"); 39 | 40 | server = new Server(threadPool); 41 | ServerConnector connector = new ServerConnector(server); 42 | connector.setPort(config.apiPort()); 43 | connector.setIdleTimeout(60 * 1000); 44 | 45 | ServletContextHandler handler = new ServletContextHandler(); 46 | handler.addServlet(new ServletHolder(new Servlet()), "/"); 47 | handler.setErrorHandler(new ErrorHandler()); 48 | 49 | server.setHandler(handler); 50 | server.addConnector(connector); 51 | server.start(); 52 | 53 | logger.info("started on port " + connector.getLocalPort()); 54 | } 55 | 56 | public void stop() throws Exception { 57 | if (server == null) throw new IllegalStateException("!started"); 58 | 59 | server.stop(); 60 | server.join(); 61 | server = null; 62 | 63 | logger.info("stopped"); 64 | } 65 | 66 | private class Servlet extends HttpServlet { 67 | protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doGet(request, response); } 68 | 69 | protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { 70 | String url = request.getRequestURL() + (request.getQueryString() != null ? "?" + request.getQueryString() : ""); 71 | logger.info("handling - " + url); 72 | 73 | try { 74 | handle(request, response); 75 | logger.info("finished handling"); 76 | } catch (HttpError e) { 77 | response.sendError(e.getCode(), e.getMessage()); 78 | } catch (Exception e) { 79 | logger.error("error handling", e); 80 | response.sendError(500, "" + e); 81 | } 82 | } 83 | 84 | private void handle(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { 85 | String uri = request.getRequestURI(); 86 | Scheduler.Config config = Scheduler.$.config; 87 | 88 | if (uri.equals("/health")) handleHealth(response); 89 | else if (uri.startsWith("/api/node")) handleNodeApi(request, response); 90 | else if (uri.startsWith("/jar/")) downloadFile(config.jar, response); 91 | else if (uri.startsWith("/hadoop/")) downloadFile(config.hadoop, response); 92 | else if (config.jre != null && uri.startsWith("/jre/")) downloadFile(config.jre, response); 93 | else throw new HttpError(404, "not found"); 94 | } 95 | 96 | private void handleHealth(HttpServletResponse response) throws IOException { 97 | response.setContentType("text/plain; charset=utf-8"); 98 | response.getWriter().println("ok"); 99 | } 100 | 101 | private void handleNodeApi(HttpServletRequest request, HttpServletResponse response) throws IOException { 102 | String uri = request.getRequestURI(); 103 | uri = uri.substring("/api/node".length()); 104 | 105 | request.setAttribute("jsonResponse", true); 106 | response.setContentType("application/json; charset=utf-8"); 107 | 108 | switch (uri) { 109 | case "/list": handleNodeList(request, response); break; 110 | case "/add": case "/update": handleNodeAddUpdate(request, response, uri.equals("/add")); break; 111 | case "/start": case "/stop": handleNodeStartStop(request, response, uri.equals("/start")); break; 112 | case "/remove": handleNodeRemove(request, response); break; 113 | default: throw new HttpError(404, "unsupported method " + uri); 114 | } 115 | } 116 | 117 | private void handleNodeList(HttpServletRequest request, HttpServletResponse response) throws IOException { 118 | String expr = "*"; 119 | if (request.getParameter("node") != null) expr = request.getParameter("node"); 120 | 121 | List ids; 122 | try { ids = Nodes.expandExpr(expr); } 123 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid node"); } 124 | 125 | List nodes = Nodes.getNodes(ids); 126 | 127 | @SuppressWarnings("unchecked") List nodesJson = new JSONArray(); 128 | for (Node node : nodes) nodesJson.add(node.toJson()); 129 | 130 | response.getWriter().println("" + nodesJson); 131 | } 132 | 133 | @SuppressWarnings("ConstantConditions") 134 | private void handleNodeAddUpdate(HttpServletRequest request, HttpServletResponse response, boolean add) throws IOException { 135 | String expr = request.getParameter("node"); 136 | if (expr == null || expr.isEmpty()) throw new HttpError(400, "node required"); 137 | 138 | List ids; 139 | try { ids = Nodes.expandExpr(expr); } 140 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid node"); } 141 | 142 | for (String id : ids) { 143 | Node node = Nodes.getNode(id); 144 | if (add && node != null) throw new HttpError(400, "duplicate node"); 145 | if (!add && node == null) throw new HttpError(400, "node not found"); 146 | if (!add && node.state != Node.State.IDLE) throw new HttpError(400, "node not idle"); 147 | } 148 | 149 | Node.Type type = null; 150 | if (add) { 151 | if (request.getParameter("type") == null) throw new HttpError(400, "type required"); 152 | 153 | try { type = Node.Type.valueOf(request.getParameter("type").toUpperCase()); } 154 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid type"); } 155 | 156 | if (type == Node.Type.NAMENODE && !Nodes.getNodes(Node.Type.NAMENODE).isEmpty()) 157 | throw new HttpError(400, "duplicate namenode"); 158 | } 159 | 160 | Double cpus = null; 161 | if (request.getParameter("cpus") != null) 162 | try { cpus = Double.valueOf(request.getParameter("cpus")); } 163 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid cpus"); } 164 | 165 | Long mem = null; 166 | if (request.getParameter("mem") != null) 167 | try { mem = Long.valueOf(request.getParameter("mem")); } 168 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid mem"); } 169 | 170 | Map constraints = null; 171 | if (request.getParameter("constraints") != null) { 172 | constraints = new LinkedHashMap<>(); 173 | Map m = Strings.parseMap(request.getParameter("constraints")); 174 | for (String name : m.keySet()) 175 | try { constraints.put(name, new Constraint(m.get(name))); } 176 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid constraint: " + e.getMessage()); } 177 | } 178 | 179 | String executorJvmOpts = request.getParameter("executorJvmOpts"); 180 | String hadoopJvmOpts = request.getParameter("hadoopJvmOpts"); 181 | 182 | String coreSiteOpts = request.getParameter("coreSiteOpts"); 183 | try { Strings.parseMap(coreSiteOpts, false); } 184 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid coreSiteOpts"); } 185 | 186 | String hdfsSiteOpts = request.getParameter("hdfsSiteOpts"); 187 | try { Strings.parseMap(hdfsSiteOpts, false); } 188 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid hdfsSiteOpts"); } 189 | 190 | String externalFsUri = request.getParameter("externalFsUri"); 191 | 192 | Period failoverDelay = null; 193 | if (request.getParameter("failoverDelay") != null) 194 | try { failoverDelay = new Period(request.getParameter("failoverDelay")); } 195 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid failoverDelay"); } 196 | 197 | Period failoverMaxDelay = null; 198 | if (request.getParameter("failoverMaxDelay") != null) 199 | try { failoverMaxDelay = new Period(request.getParameter("failoverMaxDelay")); } 200 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid failoverMaxDelay"); } 201 | 202 | String failoverMaxTries = request.getParameter("failoverMaxTries"); 203 | if (failoverMaxTries != null && !failoverMaxTries.equals("") && !Strings.isInteger(failoverMaxTries)) 204 | throw new HttpError(400, "invalid failoverMaxTries"); 205 | 206 | 207 | List nodes = new ArrayList<>(); 208 | for (String id : ids) { 209 | Node node; 210 | if (add) node = Nodes.addNode(new Node(id, type)); 211 | else node = Nodes.getNode(id); 212 | 213 | nodes.add(node); 214 | 215 | if (cpus != null) node.cpus = cpus; 216 | if (mem != null) node.mem = mem; 217 | 218 | if (constraints != null) node.constraints = constraints; 219 | 220 | if (executorJvmOpts != null) node.executorJvmOpts = executorJvmOpts.equals("") ? null : executorJvmOpts; 221 | if (hadoopJvmOpts != null) node.hadoopJvmOpts = hadoopJvmOpts.equals("") ? null : hadoopJvmOpts; 222 | 223 | if (coreSiteOpts != null) node.coreSiteOpts = Strings.parseMap(coreSiteOpts); 224 | if (hdfsSiteOpts != null) node.hdfsSiteOpts = Strings.parseMap(hdfsSiteOpts); 225 | 226 | if (externalFsUri != null && node.type == Node.Type.NAMENODE) 227 | node.externalFsUri = externalFsUri.equals("") ? null : externalFsUri; 228 | 229 | if (failoverDelay != null) node.failover.delay = failoverDelay; 230 | if (failoverMaxDelay != null) node.failover.maxDelay = failoverMaxDelay; 231 | if (failoverMaxTries != null) node.failover.maxTries = !failoverMaxTries.equals("") ? Integer.valueOf(failoverMaxTries) : null; 232 | } 233 | Nodes.save(); 234 | 235 | @SuppressWarnings("unchecked") List nodesJson = new JSONArray(); 236 | for (Node node : nodes) nodesJson.add(node.toJson()); 237 | response.getWriter().println("" + nodesJson); 238 | } 239 | 240 | @SuppressWarnings("unchecked") 241 | private void handleNodeStartStop(HttpServletRequest request, HttpServletResponse response, boolean start) throws IOException { 242 | String expr = request.getParameter("node"); 243 | if (expr == null || expr.isEmpty()) throw new HttpError(400, "node required"); 244 | 245 | List ids; 246 | try { ids = Nodes.expandExpr(expr); } 247 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid node"); } 248 | 249 | for (String id : ids) { 250 | Node node = Nodes.getNode(id); 251 | if (node == null) throw new HttpError(400, "node not found"); 252 | if (start && node.state != Node.State.IDLE) throw new HttpError(400, "node not idle"); 253 | if (!start && node.state == Node.State.IDLE) throw new HttpError(400, "node idle"); 254 | if (node.isExternal()) throw new HttpError(400, "node external"); 255 | } 256 | 257 | Period timeout = new Period("2m"); 258 | if (request.getParameter("timeout") != null) 259 | try { timeout = new Period(request.getParameter("timeout")); } 260 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid timeout"); } 261 | 262 | 263 | boolean completed = true; 264 | boolean wait = timeout.ms() > 0; 265 | List nodes = Nodes.getNodes(ids); 266 | 267 | for (Node node : nodes) { 268 | node.failover.resetFailures(); 269 | node.state = start ? Node.State.STARTING : Node.State.STOPPING; 270 | if (!start && node.runtime != null) node.runtime.killSent = false; 271 | 272 | Nodes.save(); 273 | if (!wait) continue; 274 | 275 | try { completed = node.waitFor(start ? Node.State.RUNNING : Node.State.IDLE, timeout); } 276 | catch (InterruptedException e) { throw new IllegalStateException(e); } 277 | if (!completed) break; 278 | } 279 | 280 | String status = wait ? (completed ? (start ? "started": "stopped"): "timeout") : "scheduled"; 281 | @SuppressWarnings("unchecked") List nodesJson = (List)new JSONArray(); 282 | for (Node node : nodes) nodesJson.add(node.toJson()); 283 | 284 | JSONObject json = new JSONObject(); 285 | json.put("status", status); 286 | json.put("nodes", nodesJson); 287 | response.getWriter().write("" + json); 288 | } 289 | 290 | private void handleNodeRemove(HttpServletRequest request, HttpServletResponse response) throws IOException { 291 | String expr = request.getParameter("node"); 292 | if (expr == null || expr.isEmpty()) throw new HttpError(400, "node required"); 293 | 294 | List ids; 295 | try { ids = Nodes.expandExpr(expr); } 296 | catch (IllegalArgumentException e) { throw new HttpError(400, "invalid node"); } 297 | 298 | if (ids.isEmpty()) throw new HttpError(400, "node not found"); 299 | 300 | for (String id : ids) { 301 | Node node = Nodes.getNode(id); 302 | if (node == null) throw new HttpError(400, "node not found"); 303 | if (node.state != Node.State.IDLE) throw new HttpError(400, "node not idle"); 304 | } 305 | 306 | for (Node node : Nodes.getNodes(ids)) 307 | Nodes.removeNode(node); 308 | Nodes.save(); 309 | 310 | @SuppressWarnings("unchecked") List json = new JSONArray(); 311 | json.addAll(ids); 312 | response.getWriter().write("" + json); 313 | } 314 | 315 | private void downloadFile(File file, HttpServletResponse response) throws IOException { 316 | response.setContentType("application/zip"); 317 | response.setHeader("Content-Length", "" + file.length()); 318 | response.setHeader("Content-Disposition", "attachment; filename=\"" + file.getName() + "\""); 319 | IO.copyAndClose(new FileInputStream(file), response.getOutputStream()); 320 | } 321 | } 322 | 323 | private class ErrorHandler extends org.eclipse.jetty.server.handler.ErrorHandler { 324 | public void handle(String target, Request baseRequest, HttpServletRequest request, HttpServletResponse response_) throws IOException { 325 | Response response = (Response) response_; 326 | int code = response.getStatus(); 327 | 328 | String error = response.getReason() != null ? response.getReason() : ""; 329 | PrintWriter writer = response.getWriter(); 330 | 331 | if (request.getAttribute("jsonResponse") != null) { 332 | response.setContentType("application/json; charset=utf-8"); 333 | 334 | Map map = new HashMap<>(); 335 | map.put("code", code); 336 | map.put("error", error); 337 | 338 | writer.println("" + new JSONObject(map)); 339 | } else { 340 | response.setContentType("text/plain; charset=utf-8"); 341 | writer.println(code + " - " + error); 342 | } 343 | 344 | writer.flush(); 345 | baseRequest.setHandled(true); 346 | } 347 | } 348 | 349 | class HttpError extends RuntimeException { 350 | private int code; 351 | 352 | public HttpError(int code, String message) { 353 | super(message); 354 | this.code = code; 355 | } 356 | 357 | public int getCode() { return code; } 358 | } 359 | } 360 | -------------------------------------------------------------------------------- /src/java/net/elodina/mesos/hdfs/Node.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import net.elodina.mesos.api.*; 4 | import net.elodina.mesos.util.*; 5 | import org.json.simple.JSONArray; 6 | import org.json.simple.JSONObject; 7 | 8 | import java.text.ParseException; 9 | import java.text.SimpleDateFormat; 10 | import java.util.*; 11 | 12 | public class Node { 13 | public String id; 14 | public Type type = Type.NAMENODE; 15 | public State state = State.IDLE; 16 | 17 | public double cpus = 0.5; 18 | public long mem = 512; 19 | 20 | public Map constraints = new LinkedHashMap<>(); 21 | 22 | public String executorJvmOpts; 23 | public String hadoopJvmOpts; 24 | public Map coreSiteOpts = new HashMap<>(); 25 | public Map hdfsSiteOpts = new HashMap<>(); 26 | 27 | public String externalFsUri; 28 | 29 | public Stickiness stickiness = new Stickiness(); 30 | public Failover failover = new Failover(); 31 | public Runtime runtime; 32 | public Reservation reservation; 33 | 34 | public Node() {} 35 | public Node(String id) { this.id = id; } 36 | public Node(String id, Node.Type type) { this.id = id; this.type = type; } 37 | public Node(JSONObject json) { fromJson(json); } 38 | 39 | public boolean isExternal() { return externalFsUri != null; } 40 | 41 | public String matches(Offer offer) { return matches(offer, Collections.>emptyMap(), new Date()); } 42 | 43 | public String matches(Offer offer, Map> otherAttributes) { return matches(offer, otherAttributes, new Date()); } 44 | 45 | public String matches(Offer offer, Date now) { return matches(offer, Collections.>emptyMap(), now); } 46 | 47 | public String matches(Offer offer, Map> otherAttributes, Date now) { 48 | Reservation reservation = reserve(offer); 49 | 50 | // resources 51 | if (reservation.cpus < cpus) return "cpus < " + cpus; 52 | if (reservation.mem < mem) return "mem < " + mem; 53 | 54 | // namenode running 55 | if (type == Type.DATANODE) { 56 | List nns = Nodes.getNodes(Node.Type.NAMENODE); 57 | Node nn = nns.isEmpty() ? null : nns.get(0); 58 | 59 | if (nn == null) return "no namenode"; 60 | if (!nn.isExternal() && nn.state != State.RUNNING) return "no running or external namenode"; 61 | } 62 | 63 | // constraints 64 | Map offerAttributes = new HashMap<>(); 65 | offerAttributes.put("hostname", offer.hostname()); 66 | 67 | for (Attribute attribute : offer.attributes()) 68 | offerAttributes.put(attribute.name(), "" + attribute.value()); 69 | 70 | for (String name : constraints.keySet()) { 71 | Constraint constraint = constraints.get(name); 72 | if (!offerAttributes.containsKey(name)) return "no " + name + " attribute"; 73 | if (!constraint.matches(offerAttributes.get(name), otherAttributes.get(name))) return name + " doesn't match " + constraint; 74 | } 75 | 76 | // stickiness 77 | if (!stickiness.allowsHostname(offer.hostname(), now)) 78 | return "hostname != stickiness hostname"; 79 | 80 | return null; 81 | } 82 | 83 | public Reservation reserve(Offer offer) { 84 | Map resources = new HashMap<>(); 85 | for (Resource resource : offer.resources()) resources.put(resource.name(), resource); 86 | 87 | // cpu 88 | double reservedCpus = 0; 89 | Resource cpusResource = resources.get("cpus"); 90 | if (cpusResource != null) reservedCpus = Math.min(cpusResource.value().asDouble(), cpus); 91 | 92 | // mem 93 | long reservedMem = 0; 94 | Resource memResource = resources.get("mem"); 95 | if (memResource != null) reservedMem = Math.min((long) memResource.value().asLong(), mem); 96 | 97 | // ports 98 | Map reservedPorts = reservePorts(offer); 99 | 100 | return new Reservation(reservedCpus, reservedMem, reservedPorts); 101 | } 102 | 103 | private Map reservePorts(Offer offer) { 104 | Map ports = new HashMap<>(); 105 | 106 | // find resource 107 | Resource portsResource = null; 108 | for (Resource resource : offer.resources()) 109 | if (resource.name().equals("ports")) { portsResource = resource; break; } 110 | if (portsResource == null) return ports; 111 | 112 | // collect & sort ranges 113 | List availPorts = new ArrayList<>(portsResource.value().asRanges()); 114 | Collections.sort(availPorts, new Comparator() { 115 | public int compare(Range x, Range y) { return x.start() - y.start(); } 116 | }); 117 | 118 | // reserve ports 119 | for (String name : Node.Port.names(type)) { 120 | int port = reservePort(null, availPorts); 121 | if (port != -1) ports.put(name, port); 122 | } 123 | 124 | return ports; 125 | } 126 | 127 | int reservePort(Range range, List availPorts) { 128 | Range r = null; 129 | if (range == null) 130 | r = !availPorts.isEmpty() ? availPorts.get(0) : null; // take first avail range 131 | else 132 | for (Range t : availPorts) // take first range overlapping with ports 133 | if (range.overlap(t) != null) { r = t; break; } 134 | 135 | if (r == null) return -1; 136 | int port = range != null ? r.overlap(range).start() : r.start(); 137 | 138 | // remove allocated port 139 | int idx = availPorts.indexOf(r); 140 | availPorts.remove(r); 141 | availPorts.addAll(idx, r.split(port)); 142 | 143 | return port; 144 | } 145 | 146 | public boolean waitFor(State state, Period timeout) throws InterruptedException { 147 | long t = timeout.ms(); 148 | 149 | while (t > 0 && this.state != state) { 150 | long delay = Math.min(100, t); 151 | Thread.sleep(delay); 152 | t -= delay; 153 | } 154 | 155 | return this.state == state; 156 | } 157 | 158 | public void initRuntime(Offer offer) { 159 | reservation = reserve(offer); 160 | 161 | runtime = new Runtime(); 162 | runtime.slaveId = offer.slaveId(); 163 | runtime.hostname = offer.hostname(); 164 | 165 | for (Attribute attribute : offer.attributes()) 166 | runtime.attributes.put(attribute.name(), "" + attribute.value()); 167 | 168 | runtime.fsUri = getFsUri(); 169 | } 170 | 171 | private String getFsUri() { 172 | List nodes = Nodes.getNodes(Type.NAMENODE); 173 | Node node = !nodes.isEmpty() ? nodes.get(0) : null; 174 | if (node == null) throw new IllegalStateException("no namenode"); 175 | 176 | if (node.isExternal()) return node.externalFsUri; 177 | if (node.runtime == null) throw new IllegalStateException("namenode not started"); 178 | 179 | String host = node.runtime.hostname; 180 | Integer port = node.reservation.ports.get(Port.IPC); 181 | if (port == null) throw new IllegalStateException("no ipc port"); 182 | 183 | return "hdfs://" + host + ":" + port; 184 | } 185 | 186 | public void registerStart(String hostname) { 187 | stickiness.registerStart(hostname); 188 | failover.resetFailures(); 189 | } 190 | 191 | public void registerStop() { registerStop(new Date(), false); } 192 | 193 | public void registerStop(Date now, boolean failed) { 194 | if (!failed || failover.failures == 0) stickiness.registerStop(now); 195 | 196 | if (failed) failover.registerFailure(now); 197 | else failover.resetFailures(); 198 | } 199 | 200 | public Task newTask() { 201 | if (runtime == null) throw new IllegalStateException("runtime == null"); 202 | if (reservation == null) throw new IllegalStateException("reservation == null"); 203 | 204 | String data = "" + toJson(); 205 | if (Scheduler.$.config.driverV1()) data = Base64.encode(data); 206 | 207 | return new Task() 208 | .id(runtime.taskId) 209 | .name("hdfs-" + id) 210 | .slaveId(runtime.slaveId) 211 | .executor(newExecutor()) 212 | .data(data.getBytes()) 213 | .resources(reservation.toResources()); 214 | } 215 | 216 | Task.Executor newExecutor() { 217 | if (runtime == null) throw new IllegalStateException("runtime == null"); 218 | 219 | Scheduler.Config config = Scheduler.$.config; 220 | String cmd = "java -cp " + config.jar.getName(); 221 | if (executorJvmOpts != null) cmd += " " + executorJvmOpts; 222 | 223 | cmd += " net.elodina.mesos.hdfs.Executor"; 224 | cmd += " --driver=" + config.driver; 225 | cmd += " --debug=" + config.debug; 226 | 227 | Command command = new Command() 228 | .addUri(new Command.URI(config.api + "/jar/" + config.jar.getName(), false).cache(false)) 229 | .addUri(new Command.URI(config.api + "/hadoop/" + config.hadoop.getName())); 230 | 231 | if (config.jre != null) { 232 | command.addUri(new Command.URI(config.api + "/jre/" + config.jre.getName())); 233 | cmd = "jre/bin/" + cmd; 234 | } 235 | 236 | command.value(cmd); 237 | 238 | return new Task.Executor() 239 | .id(runtime.executorId) 240 | .name("hdfs-" + id) 241 | .command(command); 242 | } 243 | 244 | @SuppressWarnings("unchecked") 245 | public JSONObject toJson() { 246 | JSONObject json = new JSONObject(); 247 | 248 | json.put("id", id); 249 | json.put("type", type.name().toLowerCase()); 250 | json.put("state", "" + state.name().toLowerCase()); 251 | 252 | json.put("cpus", cpus); 253 | json.put("mem", mem); 254 | 255 | if (!constraints.isEmpty()) json.put("constraints", Strings.formatMap(constraints)); 256 | 257 | if (executorJvmOpts != null) json.put("executorJvmOpts", executorJvmOpts); 258 | if (hadoopJvmOpts != null) json.put("hadoopJvmOpts", hadoopJvmOpts); 259 | 260 | if (!coreSiteOpts.isEmpty()) json.put("coreSiteOpts", new JSONObject(coreSiteOpts)); 261 | if (!hdfsSiteOpts.isEmpty()) json.put("hdfsSiteOpts", new JSONObject(hdfsSiteOpts)); 262 | 263 | if (externalFsUri != null) json.put("externalFsUri", externalFsUri); 264 | 265 | json.put("stickiness", stickiness.toJson()); 266 | json.put("failover", failover.toJson()); 267 | if (runtime != null) json.put("runtime", runtime.toJson()); 268 | if (reservation != null) json.put("reservation", reservation.toJson()); 269 | 270 | return json; 271 | } 272 | 273 | public void fromJson(JSONObject json) { 274 | id = (String) json.get("id"); 275 | type = Type.valueOf(json.get("type").toString().toUpperCase()); 276 | state = State.valueOf(json.get("state").toString().toUpperCase()); 277 | 278 | cpus = ((Number) json.get("cpus")).doubleValue(); 279 | mem = ((Number) json.get("mem")).longValue(); 280 | 281 | constraints.clear(); 282 | if (json.containsKey("constraints")) { 283 | Map m = Strings.parseMap((String) json.get("constraints")); 284 | for (String name : m.keySet()) constraints.put(name, new Constraint(m.get(name))); 285 | } 286 | 287 | if (json.containsKey("executorJvmOpts")) executorJvmOpts = (String) json.get("executorJvmOpts"); 288 | if (json.containsKey("hadoopJvmOpts")) hadoopJvmOpts = (String) json.get("hadoopJvmOpts"); 289 | 290 | coreSiteOpts.clear(); 291 | if (json.containsKey("coreSiteOpts")) { 292 | JSONObject coreSiteOptsJson = (JSONObject) json.get("coreSiteOpts"); 293 | for (Object name : coreSiteOptsJson.keySet()) coreSiteOpts.put("" + name, "" + coreSiteOptsJson.get("" + name)); 294 | } 295 | 296 | hdfsSiteOpts.clear(); 297 | if (json.containsKey("hdfsSiteOpts")) { 298 | JSONObject hdfsSiteOptsJson = (JSONObject) json.get("hdfsSiteOpts"); 299 | for (Object name : hdfsSiteOptsJson.keySet()) hdfsSiteOpts.put("" + name, "" + hdfsSiteOptsJson.get("" + name)); 300 | } 301 | 302 | if (json.containsKey("externalFsUri")) externalFsUri = (String) json.get("externalFsUri"); 303 | 304 | stickiness = new Stickiness((JSONObject) json.get("stickiness")); 305 | failover = new Failover((JSONObject) json.get("failover")); 306 | if (json.containsKey("runtime")) runtime = new Runtime((JSONObject) json.get("runtime")); 307 | if (json.containsKey("reservation")) reservation = new Reservation((JSONObject) json.get("reservation")); 308 | } 309 | 310 | @SuppressWarnings({"unchecked", "RedundantCast"}) 311 | public static List fromJsonArray(JSONArray nodesJson) { 312 | List nodes = new ArrayList<>(); 313 | for (JSONObject nodeJson : (List) nodesJson) nodes.add(new Node(nodeJson)); 314 | return nodes; 315 | } 316 | 317 | private static SimpleDateFormat dateTimeFormat() { 318 | SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); 319 | format.setTimeZone(TimeZone.getTimeZone("UTC-0")); 320 | return format; 321 | } 322 | 323 | public int hashCode() { return id.hashCode(); } 324 | 325 | public boolean equals(Object obj) { return obj instanceof Node && ((Node) obj).id.equals(id); } 326 | 327 | public String toString() { return id; } 328 | 329 | public enum State { 330 | IDLE, 331 | STARTING, 332 | RUNNING, 333 | STOPPING, 334 | RECONCILING 335 | } 336 | 337 | public enum Type { 338 | NAMENODE, 339 | DATANODE 340 | } 341 | 342 | public static class Port { 343 | public static final String HTTP = "http"; 344 | public static final String IPC = "ipc"; 345 | public static final String DATA = "data"; 346 | 347 | public static String[] names(Type type) { 348 | return type == Type.NAMENODE ? 349 | new String[]{HTTP, IPC} : 350 | new String[]{HTTP, IPC, DATA}; 351 | } 352 | } 353 | 354 | public static class Runtime { 355 | public String taskId = "" + UUID.randomUUID(); 356 | public String executorId = "" + UUID.randomUUID(); 357 | 358 | public String slaveId; 359 | public String hostname; 360 | public Map attributes = new LinkedHashMap<>(); 361 | 362 | public String fsUri; 363 | public boolean killSent; 364 | 365 | public Runtime() {} 366 | public Runtime(JSONObject json) { fromJson(json); } 367 | 368 | @SuppressWarnings("unchecked") 369 | public JSONObject toJson() { 370 | JSONObject json = new JSONObject(); 371 | 372 | json.put("taskId", taskId); 373 | json.put("executorId", executorId); 374 | 375 | json.put("slaveId", slaveId); 376 | json.put("hostname", hostname); 377 | if (!attributes.isEmpty()) json.put("attributes", Strings.formatMap(attributes)); 378 | 379 | json.put("fsUri", fsUri); 380 | json.put("killSent", killSent); 381 | 382 | return json; 383 | } 384 | 385 | public void fromJson(JSONObject json) { 386 | taskId = (String) json.get("taskId"); 387 | executorId = (String) json.get("executorId"); 388 | 389 | slaveId = (String) json.get("slaveId"); 390 | hostname = (String) json.get("hostname"); 391 | attributes.clear(); 392 | if (json.containsKey("attributes")) attributes.putAll(Strings.parseMap((String) json.get("attributes"))); 393 | 394 | fsUri = (String) json.get("fsUri"); 395 | killSent = (boolean) json.get("killSent"); 396 | } 397 | } 398 | 399 | public static class Reservation { 400 | double cpus = 0; 401 | long mem = 0; 402 | Map ports = new HashMap<>(); 403 | 404 | public Reservation() {} 405 | 406 | public Reservation(double cpus, long mem, Map ports) { 407 | this.cpus = cpus; 408 | this.mem = mem; 409 | this.ports = ports; 410 | } 411 | 412 | public Reservation(JSONObject json) { fromJson(json); } 413 | 414 | public List toResources() { 415 | class R { 416 | Resource cpus(double value) { 417 | return new Resource("cpus", new Value(Value.Type.SCALAR, value)); 418 | } 419 | 420 | Resource mem(long value) { 421 | return new Resource("mem", new Value(Value.Type.SCALAR, (double)value)); 422 | } 423 | 424 | Resource port(long value) { 425 | return new Resource("ports", new Value(Value.Type.RANGES, Arrays.asList(new Range((int)value)))); 426 | } 427 | } 428 | 429 | R r = new R(); 430 | 431 | List resources = new ArrayList<>(); 432 | 433 | if (cpus > 0) resources.add(r.cpus(cpus)); 434 | if (mem > 0) resources.add(r.mem(mem)); 435 | 436 | for (String name : ports.keySet()) 437 | resources.add(r.port(ports.get(name))); 438 | 439 | return resources; 440 | } 441 | 442 | public void fromJson(JSONObject json) { 443 | cpus = (double) json.get("cpus"); 444 | mem = (long) json.get("mem"); 445 | 446 | ports.clear(); 447 | if (json.containsKey("ports")) { 448 | JSONObject portsJson = (JSONObject) json.get("ports"); 449 | for (Object name : portsJson.keySet()) 450 | ports.put("" + name, ((Number) portsJson.get(name)).intValue()); 451 | } 452 | } 453 | 454 | @SuppressWarnings("unchecked") 455 | public JSONObject toJson() { 456 | JSONObject json = new JSONObject(); 457 | 458 | json.put("cpus", cpus); 459 | json.put("mem", mem); 460 | 461 | if (!ports.isEmpty()) json.put("ports", new JSONObject(ports)); 462 | 463 | return json; 464 | } 465 | } 466 | 467 | public static class Stickiness { 468 | public Period period = new Period("30m"); 469 | public volatile String hostname; 470 | public volatile Date stopTime; 471 | 472 | public Stickiness() {} 473 | public Stickiness(JSONObject json) { fromJson(json); } 474 | 475 | public Date expires() { return stopTime != null ? new Date(stopTime.getTime() + period.ms()) : null; } 476 | 477 | public void registerStart(String hostname) { 478 | this.hostname = hostname; 479 | stopTime = null; 480 | } 481 | 482 | public void registerStop() { registerStop(new Date()); } 483 | public void registerStop(Date now) { 484 | this.stopTime = now; 485 | } 486 | 487 | public boolean allowsHostname(String hostname) { return allowsHostname(hostname, new Date()); } 488 | 489 | @SuppressWarnings("SimplifiableIfStatement") 490 | public boolean allowsHostname(String hostname, Date now) { 491 | if (this.hostname == null) return true; 492 | if (stopTime == null || now.getTime() - stopTime.getTime() >= period.ms()) return true; 493 | return this.hostname.equals(hostname); 494 | } 495 | 496 | public void fromJson(JSONObject json) { 497 | period = new Period((String) json.get("period")); 498 | 499 | try { if (json.containsKey("stopTime")) stopTime = dateTimeFormat().parse((String) json.get("stopTime")); } 500 | catch (ParseException e) { throw new IllegalStateException(e); } 501 | 502 | if (json.containsKey("hostname")) hostname = (String) json.get("hostname"); 503 | } 504 | 505 | @SuppressWarnings("unchecked") 506 | public JSONObject toJson() { 507 | JSONObject json = new JSONObject(); 508 | 509 | json.put("period", "" + period); 510 | if (stopTime != null) json.put("stopTime", dateTimeFormat().format(stopTime)); 511 | if (hostname != null) json.put("hostname", hostname); 512 | 513 | return json; 514 | } 515 | } 516 | 517 | public static class Failover { 518 | public Period delay = new Period("3m"); 519 | public Period maxDelay = new Period("30m"); 520 | public Integer maxTries; 521 | 522 | public volatile int failures; 523 | public volatile Date failureTime; 524 | 525 | public Failover() {} 526 | public Failover(Period delay, Period maxDelay) { this.delay = delay; this.maxDelay = maxDelay; } 527 | public Failover(JSONObject json) { fromJson(json); } 528 | 529 | public Period currentDelay() { 530 | if (failures == 0) return new Period("0"); 531 | 532 | int multiplier = 1 << Math.min(30, failures - 1); 533 | long d = delay.ms() * multiplier; 534 | 535 | return d > maxDelay.ms() ? maxDelay : new Period(delay.value() * multiplier + delay.unit()); 536 | } 537 | 538 | public Date delayExpires() { 539 | if (failures == 0) return new Date(0); 540 | return new Date(failureTime.getTime() + currentDelay().ms()); 541 | } 542 | 543 | public boolean isWaitingDelay(Date now) { return delayExpires().getTime() > now.getTime(); } 544 | 545 | @SuppressWarnings("SimplifiableIfStatement") 546 | public boolean isMaxTriesExceeded() { 547 | if (maxTries == null) return false; 548 | return failures >= maxTries; 549 | } 550 | 551 | public void registerFailure(Date now) { 552 | failures += 1; 553 | failureTime = now; 554 | } 555 | 556 | public void resetFailures() { 557 | failures = 0; 558 | failureTime = null; 559 | } 560 | 561 | public void fromJson(JSONObject json) { 562 | delay = new Period((String) json.get("delay")); 563 | maxDelay = new Period((String) json.get("maxDelay")); 564 | if (json.containsKey("maxTries")) maxTries = ((Number) json.get("maxTries")).intValue(); 565 | 566 | if (json.containsKey("failures")) failures = ((Number) json.get("failures")).intValue(); 567 | try { if (json.containsKey("failureTime")) failureTime = dateTimeFormat().parse((String) json.get("failureTime")); } 568 | catch (ParseException e) { throw new IllegalStateException(e); } 569 | } 570 | 571 | @SuppressWarnings("unchecked") 572 | public JSONObject toJson() { 573 | JSONObject json = new JSONObject(); 574 | 575 | json.put("delay", "" + delay); 576 | json.put("maxDelay", "" + maxDelay); 577 | if (maxTries != null) json.put("maxTries", maxTries); 578 | 579 | if (failures != 0) json.put("failures", failures); 580 | if (failureTime != null) json.put("failureTime", dateTimeFormat().format(failureTime)); 581 | 582 | return json; 583 | } 584 | } 585 | } 586 | -------------------------------------------------------------------------------- /src/java/net/elodina/mesos/hdfs/NodeCli.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import joptsimple.OptionException; 4 | import joptsimple.OptionParser; 5 | import joptsimple.OptionSet; 6 | import net.elodina.mesos.util.Strings; 7 | import org.json.simple.JSONArray; 8 | import org.json.simple.JSONObject; 9 | 10 | import java.io.IOException; 11 | import java.text.SimpleDateFormat; 12 | import java.util.*; 13 | 14 | import static net.elodina.mesos.hdfs.Cli.Error; 15 | import static net.elodina.mesos.hdfs.Cli.*; 16 | 17 | public class NodeCli { 18 | public static void handle(List args, boolean help) { 19 | if (help) { 20 | handleHelp(args); 21 | return; 22 | } 23 | 24 | if (args.isEmpty()) throw new Error("command required"); 25 | String cmd = args.remove(0); 26 | 27 | switch (cmd) { 28 | case "list": handleList(args, false); break; 29 | case "add": case "update": handleAddUpdate(cmd, args, false); break; 30 | case "start": case "stop": handleStartStop(cmd, args, false); break; 31 | case "remove": handleRemove(args, false); break; 32 | default: throw new Error("unsupported command " + cmd); 33 | } 34 | } 35 | 36 | private static void handleHelp(List args) { 37 | String cmd = args.isEmpty() ? null : args.remove(0); 38 | 39 | if (cmd == null) { 40 | printLine("Node management commands\nUsage: node \n"); 41 | printCmds(); 42 | 43 | printLine(); 44 | printLine("Run `help node ` to see details of specific command"); 45 | return; 46 | } 47 | 48 | switch (cmd) { 49 | case "list": handleList(args, true); break; 50 | case "add": case "update": handleAddUpdate(cmd, args, true); break; 51 | case "start": case "stop": handleStartStop(cmd, args, true); break; 52 | case "remove": handleRemove(args, true); break; 53 | default: throw new Error("unsupported command " + cmd); 54 | } 55 | } 56 | 57 | private static void handleList(List args, boolean help) { 58 | if (help) { 59 | printLine("List nodes\nUsage: node list []\n"); 60 | handleGenericOptions(null, true); 61 | return; 62 | } 63 | 64 | String expr = !args.isEmpty() ? args.remove(0) : null; 65 | 66 | Map params = new HashMap<>(); 67 | if (expr != null) params.put("node", expr); 68 | 69 | JSONArray json; 70 | try { json = sendRequest("/node/list", params); } 71 | catch (IOException e) { throw new Error("" + e); } 72 | 73 | List nodes = Node.fromJsonArray(json); 74 | String title = nodes.isEmpty() ? "no nodes" : "node" + (nodes.size() > 1 ? "s" : "") + ":"; 75 | printLine(title); 76 | 77 | for (Node node : nodes) { 78 | printNode(node, 1); 79 | printLine(); 80 | } 81 | } 82 | 83 | private static void handleAddUpdate(String cmd, List args, boolean help) { 84 | OptionParser parser = new OptionParser(); 85 | if (cmd.equals("add")) parser.accepts("type", "node type (name_node, data_node).").withRequiredArg().required().ofType(String.class); 86 | 87 | parser.accepts("cpus", "CPU amount (0.5, 1, 2).").withRequiredArg().ofType(Double.class); 88 | parser.accepts("mem", "Mem amount in Mb.").withRequiredArg().ofType(Long.class); 89 | 90 | parser.accepts("constraints", "Node constraints (hostname=like:master,rack=like:1.*)").withRequiredArg(); 91 | 92 | parser.accepts("executor-jvm-opts", "Executor JVM options.").withRequiredArg().ofType(String.class); 93 | parser.accepts("hadoop-jvm-opts", "Hadoop JVM options.").withRequiredArg().ofType(String.class); 94 | 95 | parser.accepts("core-site-opts", "Hadoop core-site.xml options.").withRequiredArg().ofType(String.class); 96 | parser.accepts("hdfs-site-opts", "Hadoop hdfs-site.xml options.").withRequiredArg().ofType(String.class); 97 | 98 | parser.accepts("external-fs-uri", "FS URI of external namenode. If defined this node becomes external.").withRequiredArg().ofType(String.class); 99 | 100 | parser.accepts("failover-delay", "failover delay (10s, 5m, 3h)").withRequiredArg().ofType(String.class); 101 | parser.accepts("failover-max-delay", "max failover delay. See failoverDelay.").withRequiredArg().ofType(String.class); 102 | parser.accepts("failover-max-tries", "max failover tries. Default - none").withRequiredArg().ofType(String.class); 103 | 104 | if (help) { 105 | printLine(Strings.capitalize(cmd) + " node \nUsage: node " + cmd + " [options]\n"); 106 | try { parser.printHelpOn(out); } 107 | catch (IOException ignore) {} 108 | 109 | printLine(); 110 | handleGenericOptions(args, true); 111 | return; 112 | } 113 | 114 | if (args.isEmpty()) throw new Error("id required"); 115 | String expr = args.remove(0); 116 | 117 | OptionSet options; 118 | try { options = parser.parse(args.toArray(new String[args.size()])); } 119 | catch (OptionException e) { 120 | try { parser.printHelpOn(out); } 121 | catch (IOException ignore) {} 122 | 123 | printLine(); 124 | throw new Error(e.getMessage()); 125 | } 126 | 127 | String type = (String) options.valueOf("type"); 128 | Double cpus = (Double) options.valueOf("cpus"); 129 | Long mem = (Long) options.valueOf("mem"); 130 | 131 | String constraints = (String) options.valueOf("constraints"); 132 | 133 | String executorJvmOpts = (String) options.valueOf("executor-jvm-opts"); 134 | String hadoopJvmOpts = (String) options.valueOf("hadoop-jvm-opts"); 135 | 136 | String coreSiteOpts = (String) options.valueOf("core-site-opts"); 137 | String hdfsSiteOpts = (String) options.valueOf("hdfs-site-opts"); 138 | 139 | String externalFsUri = (String) options.valueOf("external-fs-uri"); 140 | 141 | String failoverDelay = (String) options.valueOf("failover-delay"); 142 | String failoverMaxDelay = (String) options.valueOf("failover-max-delay"); 143 | String failoverMaxTries = (String) options.valueOf("failover-max-tries"); 144 | 145 | Map params = new HashMap<>(); 146 | params.put("node", expr); 147 | 148 | if (type != null) params.put("type", type); 149 | if (cpus != null) params.put("cpus", "" + cpus); 150 | if (mem != null) params.put("mem", "" + mem); 151 | 152 | if (constraints != null) params.put("constraints", constraints); 153 | 154 | if (executorJvmOpts != null) params.put("executorJvmOpts", executorJvmOpts); 155 | if (hadoopJvmOpts != null) params.put("hadoopJvmOpts", hadoopJvmOpts); 156 | 157 | if (coreSiteOpts != null) params.put("coreSiteOpts", coreSiteOpts); 158 | if (hdfsSiteOpts != null) params.put("hdfsSiteOpts", hdfsSiteOpts); 159 | 160 | if (externalFsUri != null) params.put("externalFsUri", externalFsUri); 161 | 162 | if (failoverDelay != null) params.put("failoverDelay", failoverDelay); 163 | if (failoverMaxDelay != null) params.put("failoverMaxDelay", failoverMaxDelay); 164 | if (failoverMaxTries != null) params.put("failoverMaxTries", failoverMaxTries); 165 | 166 | JSONArray json; 167 | try { json = sendRequest("/node/" + cmd, params); } 168 | catch (IOException e) { throw new Error("" + e); } 169 | 170 | List nodes = Node.fromJsonArray(json); 171 | String title = "node" + (nodes.size() > 1 ? "s" : "") + (cmd.equals("add") ? " added" : " updated") + ":"; 172 | printLine(title); 173 | 174 | for (Node node : nodes) { 175 | printNode(node, 1); 176 | printLine(); 177 | } 178 | } 179 | 180 | private static void handleStartStop(String cmd, List args, boolean help) { 181 | OptionParser parser = new OptionParser(); 182 | parser.accepts("timeout", "timeout (30s, 1m, 1h). 0s - no timeout").withRequiredArg().ofType(String.class); 183 | 184 | if (help) { 185 | printLine(Strings.capitalize(cmd) + " node \nUsage: node " + cmd + " [options]\n"); 186 | try { parser.printHelpOn(out); } 187 | catch (IOException ignore) {} 188 | 189 | printLine(); 190 | handleGenericOptions(args, true); 191 | return; 192 | } 193 | 194 | if (args.isEmpty()) throw new Error("id required"); 195 | String expr = args.remove(0); 196 | 197 | OptionSet options; 198 | try { options = parser.parse(args.toArray(new String[args.size()])); } 199 | catch (OptionException e) { 200 | try { parser.printHelpOn(out); } 201 | catch (IOException ignore) {} 202 | 203 | printLine(); 204 | throw new Error(e.getMessage()); 205 | } 206 | 207 | String timeout = (String) options.valueOf("timeout"); 208 | Boolean force = (Boolean) options.valueOf("force"); 209 | 210 | HashMap params = new HashMap<>(); 211 | params.put("node", expr); 212 | if (timeout != null) params.put("timeout", timeout); 213 | if (force != null) params.put("force", "" + force); 214 | 215 | JSONObject json; 216 | try { json = sendRequest("/node/" + cmd, params); } 217 | catch (IOException e) { throw new Error("" + e); } 218 | 219 | String status = "" + json.get("status"); 220 | List nodes = Node.fromJsonArray((JSONArray) json.get("nodes")); 221 | 222 | String title = nodes.size() > 1 ? "nodes " : "node "; 223 | switch (status) { 224 | case "started": case "stopped": title += status + ":"; break; 225 | case "scheduled": title += status + " to " + cmd + ":"; break; 226 | case "timeout": throw new Error(cmd + " timeout"); 227 | } 228 | 229 | printLine(title); 230 | for (Node node : nodes) { 231 | printNode(node, 1); 232 | printLine(); 233 | } 234 | } 235 | 236 | private static void handleRemove(List args, boolean help) { 237 | if (help) { 238 | printLine("Remove node\nUsage: node remove \n"); 239 | handleGenericOptions(null, true); 240 | return; 241 | } 242 | 243 | if (args.isEmpty()) throw new Error("id required"); 244 | String expr = args.remove(0); 245 | 246 | JSONArray json; 247 | try { json = sendRequest("/node/remove", Collections.singletonMap("node", expr)); } 248 | catch (IOException e) { throw new Error("" + e); } 249 | 250 | String title = json.size() == 1 ? "node " + json.get(0) : "nodes " + Strings.join(json, ", "); 251 | title += " removed"; 252 | 253 | printLine(title); 254 | } 255 | 256 | private static void printNode(Node node, int indent) { 257 | printLine("id: " + node.id, indent); 258 | printLine("type: " + node.type.name().toLowerCase() + (node.isExternal() ? " (external)" : ""), indent); 259 | 260 | if (node.isExternal()) { 261 | printLine("external-fs-uri: " + node.externalFsUri, indent); 262 | return; 263 | } 264 | 265 | printLine("state: " + nodeState(node), indent); 266 | printLine("resources: " + nodeResources(node), indent); 267 | 268 | if (!node.constraints.isEmpty()) printLine("constraints: " + Strings.formatMap(node.constraints), indent); 269 | 270 | if (node.executorJvmOpts != null) printLine("executor-jvm-opts: " + node.executorJvmOpts, indent); 271 | if (node.hadoopJvmOpts != null) printLine("hadoop-jvm-opts: " + node.hadoopJvmOpts, indent); 272 | 273 | if (!node.coreSiteOpts.isEmpty()) printLine("core-site-opts: " + Strings.formatMap(node.coreSiteOpts), indent); 274 | if (!node.hdfsSiteOpts.isEmpty()) printLine("hdfs-site-opts: " + Strings.formatMap(node.hdfsSiteOpts), indent); 275 | 276 | printLine("stickiness: " + nodeStickiness(node.stickiness), indent); 277 | printLine("failover: " + nodeFailover(node.failover), indent); 278 | if (node.reservation != null) printLine("reservation: " + nodeReservation(node.reservation), indent); 279 | if (node.runtime != null) printNodeRuntime(node.runtime, indent); 280 | } 281 | 282 | private static void printNodeRuntime(Node.Runtime runtime, int indent) { 283 | printLine("runtime:", indent); 284 | printLine("task: " + runtime.taskId, indent + 1); 285 | printLine("executor: " + runtime.executorId, indent + 1); 286 | printLine("slave: " + runtime.slaveId + " (" + runtime.hostname + ")", indent + 1); 287 | } 288 | 289 | private static void printCmds() { 290 | printLine("Commands:"); 291 | printLine("list - list nodes", 1); 292 | printLine("add - add node", 1); 293 | printLine("update - update node", 1); 294 | printLine("start - start node", 1); 295 | printLine("stop - stop node", 1); 296 | printLine("remove - remove node", 1); 297 | } 298 | 299 | private static String nodeState(Node node) { 300 | if (node.state != Node.State.STARTING) return "" + node.state.name().toLowerCase(); 301 | 302 | if (node.failover.isWaitingDelay(new Date())) { 303 | String s = "failed " + node.failover.failures; 304 | if (node.failover.maxTries != null) s += "/" + node.failover.maxTries; 305 | s += " " + time(node.failover.failureTime); 306 | s += ", next start " + time(node.failover.delayExpires()); 307 | return s; 308 | } 309 | 310 | if (node.failover.failures > 0) { 311 | String s = "starting " + (node.failover.failures + 1); 312 | if (node.failover.maxTries != null) s += "/" + node.failover.maxTries; 313 | s += ", failed " + time(node.failover.failureTime); 314 | return s; 315 | } 316 | 317 | return "" + Node.State.STARTING.name().toLowerCase(); 318 | } 319 | 320 | private static String nodeStickiness(Node.Stickiness stickiness) { 321 | String s = "period:" + stickiness.period; 322 | 323 | if (stickiness.hostname != null) s += ", hostname:" + stickiness.hostname; 324 | if (stickiness.stopTime != null) s += ", expires:" + time(stickiness.expires()); 325 | 326 | return s; 327 | } 328 | 329 | private static String nodeFailover(Node.Failover failover) { 330 | String s = "delay:" + failover.delay; 331 | 332 | s += ", max-delay:" + failover.maxDelay; 333 | if (failover.maxTries != null) s += ", max-tries:" + failover.maxTries; 334 | 335 | return s; 336 | } 337 | 338 | private static String nodeResources(Node node) { 339 | String s = ""; 340 | 341 | s += "cpus:" + node.cpus; 342 | s += ", mem:" + node.mem; 343 | 344 | return s; 345 | } 346 | 347 | private static String nodeReservation(Node.Reservation reservation) { 348 | String s = ""; 349 | 350 | s += "cpus:" + reservation.cpus; 351 | s += ", mem:" + reservation.mem; 352 | s += ", ports:" + Strings.formatMap(reservation.ports); 353 | 354 | return s; 355 | } 356 | 357 | public static String dateTime(Date date) { 358 | return new SimpleDateFormat("yyyy-MM-dd HH:mm:ssX").format(date); 359 | } 360 | 361 | public static String time(Date date) { 362 | return new SimpleDateFormat("HH:mm:ssX").format(date); 363 | } 364 | } 365 | -------------------------------------------------------------------------------- /src/java/net/elodina/mesos/hdfs/Nodes.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import org.json.simple.JSONArray; 4 | import org.json.simple.JSONObject; 5 | 6 | import java.util.ArrayList; 7 | import java.util.Collections; 8 | import java.util.List; 9 | 10 | public class Nodes { 11 | public static Storage storage = Storage.byUri(Scheduler.$.config.storage); 12 | 13 | public static String frameworkId; 14 | private static List nodes = new ArrayList<>(); 15 | 16 | private Nodes() {} 17 | 18 | public static List getNodes() { return Collections.unmodifiableList(nodes); } 19 | 20 | public static List getNodes(Node.State state) { 21 | List nodes = new ArrayList<>(); 22 | for (Node node : getNodes()) if (node.state == state) nodes.add(node); 23 | return nodes; 24 | } 25 | 26 | public static List getNodes(Node.Type type) { 27 | List nodes = new ArrayList<>(); 28 | for (Node node : getNodes()) if (node.type == type) nodes.add(node); 29 | return nodes; 30 | } 31 | 32 | public static List getNodes(List ids) { 33 | List nodes = new ArrayList<>(); 34 | 35 | for (String id : ids) { 36 | Node node = getNode(id); 37 | if (node != null) nodes.add(node); 38 | } 39 | 40 | return nodes; 41 | } 42 | 43 | public static Node getNode(String id) { 44 | for (Node node : nodes) 45 | if (node.id.equals(id)) return node; 46 | 47 | return null; 48 | } 49 | 50 | /* 51 | Expands expr. Examples: 52 | - nn, dn0, dn3 -> nn, dn0, dn3 53 | - dn* (dn0, dn1, dn2 exists) -> dn0, dn1, dn2 54 | - 0..3 -> 0, 1, 2, 3 55 | - dn1..3 -> dn1, dn2, dn3 56 | */ 57 | public static List expandExpr(String expr) { 58 | List ids = new ArrayList<>(); 59 | 60 | for (String part : expr.split(",")) { 61 | part = part.trim(); 62 | 63 | if (part.endsWith("*")) ids.addAll(expandWildcard(expr)); 64 | else if (part.contains("..")) ids.addAll(expandRange(part)); 65 | else ids.add(part); 66 | } 67 | 68 | return ids; 69 | } 70 | 71 | private static List expandWildcard(String expr) { 72 | List ids = new ArrayList<>(); 73 | 74 | String prefix = expr.substring(0, expr.length() - 1); 75 | for (Node node : getNodes()) 76 | if (node.id.startsWith(prefix)) ids.add(node.id); 77 | 78 | return ids; 79 | } 80 | 81 | private static List expandRange(String expr) { 82 | // dn0..5 83 | int rangeIdx = expr.indexOf(".."); 84 | 85 | int startIdx = rangeIdx - 1; 86 | //noinspection StatementWithEmptyBody 87 | for (char[] chars = expr.toCharArray(); startIdx >= 0 && Character.isDigit(chars[startIdx]); startIdx--); 88 | startIdx ++; 89 | 90 | String prefix = expr.substring(0, startIdx); 91 | int start, end; 92 | try { 93 | start = Integer.parseInt(expr.substring(startIdx, rangeIdx)); 94 | end = Integer.parseInt(expr.substring(rangeIdx + 2)); 95 | } catch (NumberFormatException e) { throw new IllegalArgumentException(expr); } 96 | 97 | List ids = new ArrayList<>(); 98 | for (int i = start; i <= end; i++) 99 | ids.add(prefix + i); 100 | 101 | return ids; 102 | } 103 | 104 | public static Node addNode(Node node) { 105 | if (getNode(node.id) != null) throw new IllegalArgumentException("duplicate node"); 106 | 107 | if (node.type == Node.Type.NAMENODE && !getNodes(Node.Type.NAMENODE).isEmpty()) 108 | throw new IllegalArgumentException("second name node is not supported"); 109 | 110 | nodes.add(node); 111 | return node; 112 | } 113 | 114 | public static void removeNode(Node node) { 115 | nodes.remove(node); 116 | } 117 | 118 | public static void reset() { 119 | frameworkId = null; 120 | nodes.clear(); 121 | } 122 | 123 | public static void save() { storage.save(); } 124 | public static void load() { storage.load(); } 125 | 126 | @SuppressWarnings("unchecked") 127 | public static JSONObject toJson() { 128 | JSONObject json = new JSONObject(); 129 | 130 | if (frameworkId != null) json.put("frameworkId", frameworkId); 131 | 132 | JSONArray nodesJson = new JSONArray(); 133 | for (Node node : nodes) nodesJson.add(node.toJson()); 134 | if (!nodesJson.isEmpty()) json.put("nodes", nodesJson); 135 | 136 | return json; 137 | } 138 | 139 | @SuppressWarnings({"RedundantCast", "unchecked"}) 140 | public static void fromJson(JSONObject json) { 141 | if (json.containsKey("frameworkId")) frameworkId = (String) json.get("frameworkId"); 142 | 143 | nodes.clear(); 144 | if (json.containsKey("nodes")) 145 | nodes.addAll(Node.fromJsonArray((JSONArray) json.get("nodes"))); 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/java/net/elodina/mesos/hdfs/Scheduler.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import net.elodina.mesos.api.*; 4 | import net.elodina.mesos.api.driver.SchedulerDriver; 5 | import net.elodina.mesos.api.driver.SchedulerDriverV0; 6 | import net.elodina.mesos.api.driver.SchedulerDriverV1; 7 | import net.elodina.mesos.util.IO; 8 | import net.elodina.mesos.util.Period; 9 | import net.elodina.mesos.util.Strings; 10 | import net.elodina.mesos.util.Version; 11 | import org.apache.log4j.*; 12 | 13 | import java.io.File; 14 | import java.net.URI; 15 | import java.net.URISyntaxException; 16 | import java.text.SimpleDateFormat; 17 | import java.util.*; 18 | 19 | import static net.elodina.mesos.api.Message.shortId; 20 | 21 | public class Scheduler implements net.elodina.mesos.api.Scheduler { 22 | public static final Scheduler $ = new Scheduler(); 23 | private static final Logger logger = Logger.getLogger(Scheduler.class); 24 | 25 | public Config config = new Config(); 26 | private Reconciler reconciler = new Reconciler(); 27 | 28 | private SchedulerDriver driver; 29 | 30 | @Override 31 | public void subscribed(SchedulerDriver driver, String id, Master master) { 32 | logger.info("[subscribed] framework:" + shortId(id) + (master != null ? ", master:[" + master.toString(true) + "]" : "")); 33 | this.driver = driver; 34 | 35 | checkMesosVersion(master); 36 | reconciler.start(driver, new Date()); 37 | 38 | Nodes.frameworkId = id; 39 | Nodes.save(); 40 | } 41 | 42 | @Override 43 | public void offers(List offers) { 44 | String s = ""; 45 | for (Offer offer : offers) s += "\n" + offer.toString(true); 46 | logger.info("[offers]:" + s); 47 | onOffers(offers); 48 | } 49 | 50 | @Override 51 | public void status(Task.Status status) { 52 | logger.info("[status] " + status.toString(true)); 53 | onTaskStatus(status); 54 | } 55 | 56 | @Override 57 | public void message(String executorId, String slaveId, byte[] data) { 58 | logger.info("[message] executor:" + shortId(executorId) + ", slave: " + shortId(slaveId) + ", data: " + new String(data)); 59 | } 60 | 61 | @Override 62 | public void disconnected() { 63 | logger.info("[disconnected]"); 64 | this.driver = null; 65 | } 66 | 67 | private void onOffers(List offers) { 68 | // start nodes 69 | for (Offer offer : offers) { 70 | String reason = acceptOffer(offer); 71 | 72 | if (reason != null) { 73 | logger.info("Declined offer " + shortId(offer.id()) + ": " + reason); 74 | driver.declineOffer(offer.id()); 75 | } 76 | } 77 | 78 | // stop nodes 79 | for (Node node : Nodes.getNodes(Node.State.STOPPING)) { 80 | if (node.runtime == null) { 81 | node.state = Node.State.IDLE; 82 | continue; 83 | } 84 | 85 | if (!node.runtime.killSent) { 86 | driver.killTask(node.runtime.taskId); 87 | node.runtime.killSent = true; 88 | } 89 | } 90 | 91 | reconciler.proceed(driver, new Date()); 92 | Nodes.save(); 93 | } 94 | 95 | String acceptOffer(Offer offer) { 96 | if (reconciler.isActive()) return "reconciling"; 97 | 98 | List nodes = new ArrayList<>(); 99 | for (Node node : Nodes.getNodes(Node.State.STARTING)) 100 | if (node.runtime == null && !node.failover.isWaitingDelay(new Date())) 101 | nodes.add(node); 102 | 103 | if (nodes.isEmpty()) return "nothing to start"; 104 | 105 | List reasons = new ArrayList<>(); 106 | for (Node node : nodes) { 107 | String reason = node.matches(offer, otherAttributes()); 108 | if (reason != null) reasons.add("node " + node.id + ": " + reason); 109 | else { 110 | launchTask(node, offer); 111 | return null; 112 | } 113 | } 114 | 115 | return Strings.join(reasons, ", "); 116 | } 117 | 118 | void launchTask(Node node, Offer offer) { 119 | node.initRuntime(offer); 120 | Task task = node.newTask(); 121 | 122 | driver.launchTask(offer.id(), task); 123 | logger.info("Starting node " + node.id + " with task " + shortId(node.runtime.taskId) + " with offer " + shortId(offer.id())); 124 | } 125 | 126 | void onTaskStatus(Task.Status status) { 127 | Node node = getNodeByTaskId(status.id()); 128 | 129 | switch (status.state()) { 130 | case RUNNING: 131 | onTaskStarted(node, status); 132 | break; 133 | case FINISHED: 134 | case FAILED: 135 | case KILLED: 136 | case LOST: 137 | case ERROR: 138 | onTaskStopped(node, status); 139 | } 140 | } 141 | 142 | void onTaskStarted(Node node, Task.Status status) { 143 | boolean expectedState = node != null && Arrays.asList(Node.State.STARTING, Node.State.RUNNING, Node.State.RECONCILING).contains(node.state); 144 | if (!expectedState) { 145 | String id = node != null ? node.id : ""; 146 | logger.info("Got " + status.state() + " for node " + id + ", killing task"); 147 | driver.killTask(status.id()); 148 | return; 149 | } 150 | 151 | if (node.state == Node.State.RECONCILING) 152 | logger.info("Finished reconciling of node " + node.id + ", task " + shortId(node.runtime.taskId)); 153 | 154 | node.state = Node.State.RUNNING; 155 | node.registerStart(node.runtime.hostname); 156 | } 157 | 158 | void onTaskStopped(Node node, Task.Status status) { 159 | boolean expectedState = node != null && node.state != Node.State.IDLE; 160 | if (!expectedState) { 161 | String id = node != null ? node.id : ""; 162 | logger.info("Got " + status.state() + " for node " + id + ", ignoring it"); 163 | return; 164 | } 165 | 166 | boolean stopping = node.state == Node.State.STOPPING; 167 | 168 | boolean failed = !stopping && status.state() != Task.State.FINISHED && status.state() != Task.State.KILLED; 169 | node.registerStop(new Date(), failed); 170 | 171 | if (failed) { 172 | if (node.failover.isMaxTriesExceeded()) stopping = true; 173 | logger.info(failureMessage(node)); 174 | } 175 | 176 | node.state = stopping ? Node.State.IDLE : Node.State.STARTING; 177 | node.runtime = null; 178 | node.reservation = null; 179 | } 180 | 181 | private String failureMessage(Node node) { 182 | String s = "Node " + node.id + " failed " + node.failover.failures; 183 | if (node.failover.maxTries != null) s += "/" + node.failover.maxTries; 184 | 185 | if (!node.failover.isMaxTriesExceeded()) { 186 | SimpleDateFormat timeFormat = new SimpleDateFormat("HH:mm:ssX"); 187 | s += ", waiting " + node.failover.currentDelay(); 188 | s += ", next start ~ " + timeFormat.format(node.failover.delayExpires()); 189 | } else { 190 | s += ", failure limit exceeded"; 191 | s += ", stopping node"; 192 | } 193 | 194 | return s; 195 | } 196 | 197 | private Node getNodeByTaskId(String taskId) { 198 | for (Node node : Nodes.getNodes()) 199 | if (node.runtime != null && node.runtime.taskId.equals(taskId)) 200 | return node; 201 | 202 | return null; 203 | } 204 | 205 | Map> otherAttributes() { 206 | class Result { 207 | Map> map = new HashMap<>(); 208 | void add(String name, String value) { 209 | if (!map.containsKey(name)) map.put(name, new ArrayList()); 210 | map.get(name).add(value); 211 | } 212 | } 213 | Result result = new Result(); 214 | 215 | for (Node node : Nodes.getNodes()) { 216 | if (node.runtime == null) continue; 217 | 218 | result.add("hostname", node.runtime.hostname); 219 | for (String name : node.runtime.attributes.keySet()) 220 | result.add(name, node.runtime.attributes.get(name)); 221 | } 222 | 223 | return result.map; 224 | } 225 | 226 | void checkMesosVersion(Master master) { 227 | if (master == null) return; 228 | Version minVersion = new Version("0.23.0"); 229 | Version version = master.version(); 230 | 231 | if (version == null || version.compareTo(minVersion) < 0) { 232 | String versionStr = version == null ? "?(<0.23.0)" : "" + version; 233 | logger.fatal("Unsupported Mesos version " + versionStr + ", expected version " + minVersion + "+"); 234 | driver.stop(); 235 | } 236 | } 237 | 238 | public void run() { 239 | initLogging(); 240 | config.resolveDeps(); 241 | 242 | logger.info("Starting " + getClass().getSimpleName() + ":\n" + config); 243 | Nodes.load(); 244 | 245 | final HttpServer server = new HttpServer(); 246 | try { server.start(); } 247 | catch (Exception e) { throw new RuntimeException(e); } 248 | 249 | Framework framework = new Framework(); 250 | 251 | if (Nodes.frameworkId != null) framework.id(Nodes.frameworkId); 252 | framework.user(config.user != null ? config.user : ""); 253 | 254 | framework.name(config.frameworkName); 255 | framework.role(config.frameworkRole); 256 | framework.timeout(config.frameworkTimeout); 257 | framework.checkpoint(true); 258 | 259 | Cred cred = null; 260 | if (config.principal != null && config.secret != null) { 261 | framework.principal(config.principal); 262 | cred = new Cred(config.principal, config.secret); 263 | } 264 | 265 | SchedulerDriver driver = config.driverV1() 266 | ? new SchedulerDriverV1(Scheduler.$, framework, config.master) 267 | : new SchedulerDriverV0(Scheduler.$, framework, config.master, cred); 268 | 269 | Runtime.getRuntime().addShutdownHook(new Thread() { 270 | public void run() { 271 | logger.info("Stopping " + getClass().getSimpleName()); 272 | 273 | try { server.stop(); } 274 | catch (Exception e) { logger.warn("", e); } 275 | } 276 | }); 277 | 278 | boolean stopped; 279 | try { stopped = driver.run(); } 280 | catch (Exception e) { throw new Error(e); } 281 | System.exit(stopped ? 0 : 1); 282 | } 283 | 284 | void initLogging() { 285 | System.setProperty("org.eclipse.jetty.util.log.class", JettyLog4jLogger.class.getName()); 286 | BasicConfigurator.resetConfiguration(); 287 | 288 | Logger root = Logger.getRootLogger(); 289 | root.setLevel(Level.INFO); 290 | 291 | Logger.getLogger("net.elodina.mesos.api").setLevel(config.debug ? Level.DEBUG : Level.INFO); 292 | 293 | Logger.getLogger("org.eclipse.jetty").setLevel(Level.WARN); 294 | Logger.getLogger("org.apache.zookeeper").setLevel(Level.WARN); 295 | Logger.getLogger("org.I0Itec.zkclient").setLevel(Level.WARN); 296 | 297 | PatternLayout layout = new PatternLayout("%d [%t] %p %c{2} - %m%n"); 298 | root.addAppender(new ConsoleAppender(layout)); 299 | } 300 | 301 | public static class JettyLog4jLogger implements org.eclipse.jetty.util.log.Logger { 302 | private Logger logger; 303 | 304 | @SuppressWarnings("UnusedDeclaration") 305 | public JettyLog4jLogger() { this.logger = Logger.getLogger("Jetty"); } 306 | public JettyLog4jLogger(Logger logger) { this.logger = logger; } 307 | 308 | public boolean isDebugEnabled() { return logger.isDebugEnabled(); } 309 | public void setDebugEnabled(boolean enabled) { logger.setLevel(enabled ? Level.DEBUG : Level.INFO); } 310 | 311 | public void info(String s, Object... args) { logger.info(format(s, args)); } 312 | public void info(String s, Throwable t) { logger.info(s, t); } 313 | public void info(Throwable t) { logger.info("", t); } 314 | 315 | public void debug(String s, Object... args) { logger.debug(format(s, args)); } 316 | public void debug(String msg, Throwable th) { logger.debug(msg, th); } 317 | public void debug(Throwable t) { logger.debug("", t); } 318 | 319 | public void warn(String s, Object... args) { logger.warn(format(s, args)); } 320 | public void warn(String msg, Throwable th) { logger.warn(msg, th); } 321 | public void warn(Throwable t) { logger.warn("", t); } 322 | 323 | public void ignore(Throwable throwable) { logger.info("Ignored", throwable); } 324 | 325 | public org.eclipse.jetty.util.log.Logger getLogger(String name) { return new JettyLog4jLogger(Logger.getLogger(name)); } 326 | public String getName() { return logger.getName(); } 327 | 328 | private static String format(String s, Object ... args) { 329 | // {} text {} text ... 330 | String result = ""; 331 | 332 | int i = 0; 333 | for (String text : s.split("\\{\\}")) { 334 | result += text; 335 | if (args.length > i) result += args[i]; 336 | i++; 337 | } 338 | 339 | return result; 340 | } 341 | } 342 | 343 | public static class Config { 344 | public boolean debug; 345 | public String api; 346 | public String storage = "file:hdfs-mesos.json"; 347 | 348 | public String jarMask = "hdfs-mesos-.*jar"; 349 | public String hadoopMask = "hadoop-.*gz"; 350 | public String jreMask; 351 | 352 | public File jar; 353 | public File hadoop; 354 | public File jre; 355 | 356 | public int apiPort() { 357 | try { 358 | int port = new URI(api).getPort(); 359 | return port == -1 ? 80 : port; 360 | } catch (URISyntaxException e) { 361 | throw new IllegalStateException(e); 362 | } 363 | } 364 | 365 | public String driver = "v0"; 366 | public String master; 367 | public String user; 368 | public String principal; 369 | public String secret; 370 | 371 | public String frameworkName = "hdfs"; 372 | public String frameworkRole = "*"; 373 | public Period frameworkTimeout = new Period("30d"); 374 | 375 | public boolean driverV1() { return driver.equals("v1"); } 376 | 377 | void resolveDeps() { 378 | hadoop = IO.findFile(new File("."), hadoopMask); 379 | if (hadoop == null) throw new Cli.Error(hadoopMask + " not found in current dir"); 380 | checkHadoopVersion(); 381 | 382 | jar = IO.findFile(new File("."), jarMask); 383 | if (jar == null) throw new Cli.Error(jarMask + " not found in current dir"); 384 | 385 | if (jreMask != null) { 386 | jre = IO.findFile(new File("."), jreMask); 387 | if (jre == null) throw new Cli.Error(jreMask + " not found in current dir"); 388 | } 389 | } 390 | 391 | private void checkHadoopVersion() { 392 | // hadoop-1.2.1.tar.gz 393 | String name = hadoop.getName(); 394 | int hyphenIdx = name.indexOf("-"); 395 | int extIdx = name.indexOf(".tar.gz"); 396 | 397 | if (hyphenIdx == -1 || extIdx == -1) throw new Cli.Error("Can't extract version from " + name); 398 | Version version = new Version(name.substring(hyphenIdx + 1, extIdx)); 399 | 400 | boolean supported1x = version.compareTo(new Version("1.2")) >= 0 && version.compareTo(new Version("1.3")) < 0; 401 | boolean supported2x = version.compareTo(new Version("2.7")) >= 0 && version.compareTo(new Version("2.8")) < 0; 402 | if (!supported1x && !supported2x) 403 | throw new Cli.Error("Supported hadoop versions are 1.2.x and 2.7.x, current is " + version); 404 | } 405 | 406 | public String toString() { 407 | String s = ""; 408 | 409 | s += "api: " + api; 410 | s += "\nfiles: jar:" + jar + ", hadoop:" + hadoop + (jre != null ? ", jre:" + jre : ""); 411 | 412 | s += "\nmesos: driver:" + driver + ", master:" + master + ", user:" + (user == null ? "" : user); 413 | s += ", principal:" + (principal == null ? "" : principal) + ", secret:" + (secret == null ? "" : "******"); 414 | 415 | s += "\nframework: name:" + frameworkName + ", role:" + frameworkRole + ", timeout:" + frameworkTimeout; 416 | 417 | return s; 418 | } 419 | } 420 | 421 | public static class Reconciler { 422 | private Period delay; 423 | private int maxTries; 424 | 425 | private int tries; 426 | private Date lastTry; 427 | 428 | public Reconciler() { this(new Period("30s"), 3); } 429 | 430 | public Reconciler(Period delay, int maxTries) { 431 | this.delay = delay; 432 | this.maxTries = maxTries; 433 | } 434 | 435 | public Period getDelay() { return delay; } 436 | public int getMaxTries() { return maxTries; } 437 | 438 | public int getTries() { return tries; } 439 | public Date getLastTry() { return lastTry; } 440 | 441 | public boolean isActive() { return Nodes.getNodes(Node.State.RECONCILING).size() > 0; } 442 | 443 | public void start(SchedulerDriver driver, Date now) { 444 | tries = 1; 445 | lastTry = now; 446 | 447 | for (Node node : Nodes.getNodes()) { 448 | if (node.runtime == null) continue; 449 | 450 | node.state = Node.State.RECONCILING; 451 | logger.info("Reconciling " + tries + "/" + maxTries + " state of node " + node.id + ", task " + shortId(node.runtime.taskId)); 452 | } 453 | 454 | driver.reconcileTasks(Collections.emptyList()); 455 | } 456 | 457 | public void proceed(SchedulerDriver driver, Date now) { 458 | if (lastTry == null) return; 459 | 460 | if (now.getTime() - lastTry.getTime() < delay.ms()) 461 | return; 462 | 463 | tries += 1; 464 | lastTry = now; 465 | 466 | if (tries > maxTries) { 467 | for (Node node : Nodes.getNodes(Node.State.RECONCILING)) { 468 | if (node.runtime == null) continue; 469 | 470 | logger.info("Reconciling exceeded " + maxTries + " tries for node " + node.id + ", sending killTask for task " + shortId(node.runtime.taskId)); 471 | driver.killTask(node.runtime.taskId); 472 | node.runtime = null; 473 | node.state = Node.State.STARTING; 474 | } 475 | 476 | tries = 0; 477 | lastTry = null; 478 | return; 479 | } 480 | 481 | List ids = new ArrayList<>(); 482 | 483 | for (Node node : Nodes.getNodes(Node.State.RECONCILING)) { 484 | if (node.runtime == null) continue; 485 | logger.info("Reconciling " + tries + "/" + maxTries + " state of node " + node.id + ", task " + shortId(node.runtime.taskId)); 486 | ids.add(node.runtime.taskId); 487 | } 488 | 489 | if (!ids.isEmpty()) driver.reconcileTasks(ids); 490 | } 491 | } 492 | } 493 | -------------------------------------------------------------------------------- /src/java/net/elodina/mesos/hdfs/SchedulerCli.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import joptsimple.OptionException; 4 | import joptsimple.OptionParser; 5 | import joptsimple.OptionSet; 6 | import net.elodina.mesos.util.Period; 7 | 8 | import java.io.*; 9 | import java.util.*; 10 | 11 | import static net.elodina.mesos.hdfs.Cli.Error; 12 | import static net.elodina.mesos.hdfs.Cli.*; 13 | 14 | public class SchedulerCli { 15 | public static void handle(List args, boolean help) { 16 | Scheduler.Config config = Scheduler.$.config; 17 | 18 | OptionParser parser = new OptionParser(); 19 | parser.accepts("api", "Binding host:port for http/artifact server.").withRequiredArg().ofType(String.class); 20 | parser.accepts("storage", " Storage for cluster state.\nDefault - " + config.storage + ".\nExamples:\n file:hdfs-mesos.json;\n zk:master:2181/hdfs-mesos;\n zk:m1:2181,m2:2181/hdfs-mesos;").withRequiredArg().ofType(String.class); 21 | 22 | parser.accepts("debug", "Enable debug logging. Default - false").withRequiredArg().ofType(Boolean.class); 23 | parser.accepts("driver", "Mesos driver version (v0, v1). Default - " + config.driver).withRequiredArg().ofType(String.class); 24 | parser.accepts("master", "Mesos Master address(es).").withRequiredArg().ofType(String.class); 25 | parser.accepts("user", "Mesos user. Default - none").withRequiredArg().ofType(String.class); 26 | parser.accepts("principal", "Principal (username) used to register framework.").withRequiredArg().ofType(String.class); 27 | parser.accepts("secret", "Secret (password) used to register framework.").withRequiredArg().ofType(String.class); 28 | 29 | parser.accepts("framework-name", "Framework name. Default - " + config.frameworkName + ".").withRequiredArg().ofType(String.class); 30 | parser.accepts("framework-role", "Framework role. Default- " + config.frameworkRole + ".").withRequiredArg().ofType(String.class); 31 | parser.accepts("framework-timeout", "Framework failover timeout. Default - " + config.frameworkTimeout + ".").withRequiredArg().ofType(String.class); 32 | 33 | parser.accepts("jar", "hdfs-mesos jar mask (hdfs-mesos-.*jar). Default - " + config.jarMask + ".").withRequiredArg().ofType(String.class); 34 | parser.accepts("hadoop", "Hadoop archive mask (hadoop-.*gz). Default - " + config.hadoopMask + ".").withRequiredArg().ofType(String.class); 35 | parser.accepts("jre", "JRE archive mask (jre*.zip). Default - none.").withRequiredArg().ofType(String.class); 36 | 37 | if (help) { 38 | printLine("Generic Options"); 39 | 40 | try { parser.printHelpOn(out); } 41 | catch (IOException ignore) {} 42 | 43 | return; 44 | } 45 | 46 | OptionSet options; 47 | try { options = parser.parse(args.toArray(new String[args.size()])); } 48 | catch (OptionException e) { 49 | try { parser.printHelpOn(out); } 50 | catch (IOException ignore) {} 51 | 52 | printLine(); 53 | throw new Error(e.getMessage()); 54 | } 55 | 56 | Map defaults = defaults(); 57 | 58 | String api = (String) options.valueOf("api"); 59 | if (api == null) api = defaults.get("api"); 60 | if (api == null) throw new Error("api required"); 61 | 62 | String storage = (String) options.valueOf("storage"); 63 | if (storage == null) storage = defaults.get("storage"); 64 | if (storage != null) 65 | try { Storage.byUri(storage); } 66 | catch (IllegalArgumentException e) { throw new Error("invalid storage"); } 67 | 68 | Boolean debug = (Boolean) options.valueOf("debug"); 69 | if (debug == null && defaults.containsKey("debug")) debug = Boolean.valueOf(defaults.get("debug")); 70 | if (debug != null) config.debug = debug; 71 | 72 | String driver = (String) options.valueOf("driver"); 73 | if (driver == null) driver = defaults.get("driver"); 74 | if (driver != null && !Arrays.asList("v0", "v1").contains(driver)) 75 | throw new Error("invalid driver"); 76 | 77 | String master = (String) options.valueOf("master"); 78 | if (master == null) master = defaults.get("master"); 79 | if (master == null) throw new Error("master required"); 80 | 81 | String user = (String) options.valueOf("user"); 82 | if (user == null) user = defaults.get("user"); 83 | 84 | String principal = (String) options.valueOf("principal"); 85 | if (principal == null) principal = defaults.get("principal"); 86 | 87 | String secret = (String) options.valueOf("secret"); 88 | if (secret == null) secret = defaults.get("secret"); 89 | 90 | 91 | String frameworkName = (String) options.valueOf("framework-name"); 92 | if (frameworkName == null) frameworkName = defaults.get("framework-name"); 93 | 94 | String frameworkRole = (String) options.valueOf("framework-role"); 95 | if (frameworkRole == null) frameworkRole = defaults.get("framework-role"); 96 | 97 | String frameworkTimeout = (String) options.valueOf("framework-timeout"); 98 | if (frameworkTimeout == null) frameworkTimeout = defaults.get("framework-timeout"); 99 | if (frameworkTimeout != null) 100 | try { new Period(frameworkTimeout); } 101 | catch (IllegalArgumentException e) { throw new Error("invalid framework-timeout"); } 102 | 103 | 104 | String jar = (String) options.valueOf("jar"); 105 | if (jar == null) jar = defaults.get("jar"); 106 | 107 | String hadoop = (String) options.valueOf("hadoop"); 108 | if (hadoop == null) hadoop = defaults.get("hadoop"); 109 | 110 | String jre = (String) options.valueOf("jre"); 111 | if (jre == null) jre = defaults.get("jre"); 112 | 113 | config.api = api; 114 | if (storage != null) config.storage = storage; 115 | 116 | if (driver != null) config.driver = driver; 117 | config.master = master; 118 | config.user = user; 119 | config.principal = principal; 120 | config.secret = secret; 121 | 122 | if (frameworkName != null) config.frameworkName = frameworkName; 123 | if (frameworkRole != null) config.frameworkRole = frameworkRole; 124 | if (frameworkTimeout != null) config.frameworkTimeout = new Period(frameworkTimeout); 125 | 126 | if (jar != null) config.jarMask = jar; 127 | if (hadoop != null) config.hadoopMask = hadoop; 128 | if (jre != null) config.jreMask = jre; 129 | 130 | Scheduler.$.run(); 131 | } 132 | 133 | private static Map defaults() { 134 | Map defaults = new HashMap<>(); 135 | 136 | File file = new File("hdfs-mesos.properties"); 137 | if (!file.exists()) return defaults; 138 | 139 | Properties props = new Properties(); 140 | try (InputStream stream = new FileInputStream(file)) { props.load(stream); } 141 | catch (IOException e) { throw new IOError(e); } 142 | 143 | for (Object name : props.keySet()) 144 | defaults.put("" + name, props.getProperty("" + name)); 145 | return defaults; 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/java/net/elodina/mesos/hdfs/Storage.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import net.elodina.mesos.util.IO; 4 | import org.I0Itec.zkclient.ZkClient; 5 | import org.I0Itec.zkclient.serialize.BytesPushThroughSerializer; 6 | import org.json.simple.JSONObject; 7 | import org.json.simple.parser.JSONParser; 8 | import org.json.simple.parser.ParseException; 9 | 10 | import java.io.File; 11 | import java.io.IOError; 12 | import java.io.IOException; 13 | import java.nio.charset.Charset; 14 | 15 | public abstract class Storage { 16 | public static Storage file(File file) { return new FileStorage(file); } 17 | public static Storage zk(String zk) { return new ZkStorage(zk); } 18 | 19 | public static Storage byUri(String uri) { 20 | // zk:master:2181/hdfs-mesos, file:hdfs-mesos.json 21 | int colonIdx = uri.indexOf(":"); 22 | if (colonIdx == -1) throw new IllegalArgumentException(uri); 23 | 24 | String protocol = uri.substring(0, colonIdx); 25 | String value = uri.substring(colonIdx + 1); 26 | switch (protocol) { 27 | case "file": return new FileStorage(new File(value)); 28 | case "zk": return new ZkStorage(value); 29 | default: throw new IllegalArgumentException(uri); 30 | } 31 | } 32 | 33 | public abstract void save(); 34 | public abstract void load(); 35 | public abstract void clear(); 36 | 37 | private static class FileStorage extends Storage { 38 | private File file; 39 | private FileStorage(File file) { this.file = file; } 40 | 41 | @Override 42 | public void save() { 43 | JSONObject json = Nodes.toJson(); 44 | try { IO.writeFile(file, "" + json); } 45 | catch (IOException e) { throw new IOError(e); } 46 | } 47 | 48 | @Override 49 | public void load() { 50 | if (!file.exists()) return; 51 | 52 | JSONObject obj; 53 | try { obj = (JSONObject) new JSONParser().parse(IO.readFile(file)); } 54 | catch (ParseException | IOException e) { throw new IOError(e); } 55 | 56 | Nodes.fromJson(obj); 57 | } 58 | 59 | @Override 60 | public void clear() { 61 | if (!file.exists()) return; 62 | if (!file.delete()) throw new IOError(new IOException("failed to delete " + file)); 63 | } 64 | } 65 | 66 | private static class ZkStorage extends Storage { 67 | private String connect, path; 68 | 69 | private ZkStorage(String zk) { 70 | // master:2181/hdfs-mesos, master:2181,master2:2181/hdfs-mesos 71 | int slashIdx = zk.indexOf("/"); 72 | if (slashIdx == -1) throw new IllegalArgumentException(zk); 73 | 74 | connect = zk.substring(0, slashIdx); 75 | path = zk.substring(slashIdx); 76 | } 77 | 78 | private ZkClient client() { return new ZkClient(connect, 30000, 30000, new BytesPushThroughSerializer()); } 79 | 80 | @Override 81 | public void save() { 82 | ZkClient client = client(); 83 | try { 84 | client.createPersistent(path, true); 85 | client.writeData(path, Nodes.toJson().toString().getBytes(Charset.forName("utf-8"))); 86 | } finally { client.close(); } 87 | } 88 | 89 | @Override 90 | public void load() { 91 | ZkClient client = client(); 92 | try { 93 | byte[] bytes = client.readData(path, true); 94 | if (bytes == null) return; 95 | 96 | JSONObject json = (JSONObject) new JSONParser().parse(new String(bytes, Charset.forName("utf-8"))); 97 | Nodes.fromJson(json); 98 | } catch (ParseException e) { 99 | throw new IOError(e); 100 | } finally { 101 | client.close(); 102 | } 103 | } 104 | 105 | @Override 106 | public void clear() { 107 | ZkClient client = client(); 108 | try { client.delete(path); } 109 | finally { client.close(); } 110 | } 111 | } 112 | } 113 | 114 | 115 | -------------------------------------------------------------------------------- /src/test/net/elodina/mesos/hdfs/CliTest.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import net.elodina.mesos.util.Strings; 4 | import org.junit.After; 5 | import org.junit.Before; 6 | import org.junit.Test; 7 | 8 | import java.io.ByteArrayOutputStream; 9 | import java.io.PrintStream; 10 | import java.util.ArrayList; 11 | import java.util.Arrays; 12 | 13 | import static org.junit.Assert.*; 14 | 15 | public class CliTest extends HdfsMesosTestCase { 16 | private ByteArrayOutputStream out; 17 | private HttpServer server; 18 | 19 | @Before 20 | public void before() throws Exception { 21 | super.before(); 22 | 23 | out = new ByteArrayOutputStream(); 24 | Cli.out = new PrintStream(out, true); 25 | 26 | server = new HttpServer(); 27 | server.start(); 28 | } 29 | 30 | @After 31 | public void after() throws Exception { 32 | Cli.out = System.out; 33 | server.stop(); 34 | super.after(); 35 | } 36 | 37 | @Test 38 | public void node_list() { 39 | // no nodes 40 | exec("node list"); 41 | assertOutContains("no nodes"); 42 | 43 | // 2 nodes 44 | Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 45 | Nodes.addNode(new Node("dn", Node.Type.DATANODE)); 46 | exec("node list"); 47 | assertOutContains("nodes:"); 48 | assertOutContains("id: nn"); 49 | assertOutContains("id: dn"); 50 | 51 | // single node 52 | exec("node list nn"); 53 | assertOutContains("node:"); 54 | assertOutContains("id: nn"); 55 | assertOutNotContains("id: dn"); 56 | 57 | // expr error 58 | try { exec("node list 0..a"); fail(); } 59 | catch (Cli.Error e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid node")); } 60 | } 61 | 62 | @Test 63 | public void node_add_update() { 64 | // add node 65 | exec("node add nn --type=namenode --cpus=2 --mem=1024"); 66 | assertOutContains("node added"); 67 | 68 | Node nn = Nodes.getNode("nn"); 69 | assertNotNull(nn); 70 | assertEquals(2, nn.cpus, 0.001); 71 | assertEquals(1024, nn.mem); 72 | 73 | // update node 74 | exec("node update nn --core-site-opts=a=1"); 75 | assertOutContains("node updated"); 76 | assertEquals(Strings.parseMap("a=1"), nn.coreSiteOpts); 77 | } 78 | 79 | @Test 80 | public void node_remove() { 81 | Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 82 | Nodes.addNode(new Node("dn", Node.Type.DATANODE)); 83 | 84 | // remove dn 85 | exec("node remove dn"); 86 | assertOutContains("node dn removed"); 87 | assertEquals(1, Nodes.getNodes().size()); 88 | assertNull(Nodes.getNode("dn")); 89 | 90 | // remove nn 91 | exec("node remove nn"); 92 | assertOutContains("node nn removed"); 93 | assertTrue(Nodes.getNodes().isEmpty()); 94 | 95 | // no nodes to remove 96 | try { exec("node remove *"); fail(); } 97 | catch (Cli.Error e) { assertTrue(e.getMessage(), e.getMessage().contains("node not found")); } 98 | } 99 | 100 | @Test 101 | public void node_start_stop() { 102 | // start node 103 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 104 | exec("node start nn --timeout=0"); 105 | assertOutContains("node scheduled to start:"); 106 | assertOutContains("id: nn"); 107 | assertEquals(Node.State.STARTING, nn.state); 108 | 109 | // stop node 110 | exec("node stop nn --timeout=0"); 111 | assertOutContains("node scheduled to stop:"); 112 | assertOutContains("id: nn"); 113 | assertEquals(Node.State.STOPPING, nn.state); 114 | } 115 | 116 | private void exec(String cmd) { 117 | out.reset(); 118 | Cli.handle(new ArrayList<>(Arrays.asList(cmd.split(" ")))); 119 | } 120 | 121 | private void assertOutContains(String message) { 122 | if (!out.toString().contains(message)) 123 | throw new AssertionError("out expected contain \"" + message + "\", actual:\n" + out.toString()); 124 | } 125 | 126 | private void assertOutNotContains(String message) { 127 | if (out.toString().contains(message)) 128 | throw new AssertionError("out not expected contain \"" + message + "\", actual:\n" + out.toString()); 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/test/net/elodina/mesos/hdfs/HdfsMesosTestCase.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import net.elodina.mesos.api.Master; 4 | import net.elodina.mesos.api.Task; 5 | import net.elodina.mesos.api.driver.SchedulerDriver; 6 | import net.elodina.mesos.test.MesosTestCase; 7 | import net.elodina.mesos.util.Net; 8 | import org.apache.log4j.BasicConfigurator; 9 | import org.junit.After; 10 | import org.junit.Before; 11 | import org.junit.Ignore; 12 | 13 | import java.io.File; 14 | import java.nio.file.Files; 15 | import java.util.ArrayList; 16 | import java.util.List; 17 | 18 | import static org.junit.Assert.assertTrue; 19 | 20 | @Ignore 21 | public class HdfsMesosTestCase extends MesosTestCase { 22 | TestSchedulerDriver schedulerDriver = new TestSchedulerDriver(); 23 | 24 | @Before 25 | public void before() throws Exception { 26 | BasicConfigurator.configure(); 27 | Scheduler.$.initLogging(); 28 | 29 | File storageFile = Files.createTempFile(MesosTestCase.class.getSimpleName(), null).toFile(); 30 | assertTrue(storageFile.delete()); 31 | Nodes.storage = Storage.file(storageFile); 32 | Nodes.reset(); 33 | 34 | Scheduler.Config config = Scheduler.$.config; 35 | config.api = "http://localhost:" + Net.findAvailPort(); 36 | config.jar = new File("hdfs-mesos-0.1.jar"); 37 | config.hadoop = new File("hadoop-1.2.1.tar.gz"); 38 | 39 | Cli.api = config.api; 40 | Scheduler.$.subscribed(schedulerDriver, "id", new Master()); 41 | } 42 | 43 | @After 44 | public void after() throws Exception { 45 | Scheduler.$.disconnected(); 46 | BasicConfigurator.resetConfiguration(); 47 | 48 | Scheduler.Config config = Scheduler.$.config; 49 | config.api = null; 50 | config.jar = null; 51 | 52 | Nodes.storage.clear(); 53 | } 54 | 55 | public static class TestSchedulerDriver implements SchedulerDriver { 56 | public List declinedOffers = new ArrayList<>(); 57 | public List acceptedOffers = new ArrayList<>(); 58 | 59 | public List launchedTasks = new ArrayList<>(); 60 | public List killedTasks = new ArrayList<>(); 61 | public List reconciledTasks = new ArrayList<>(); 62 | 63 | public boolean stopped; 64 | public List sentFrameworkMessages = new ArrayList<>(); 65 | 66 | @Override 67 | public void declineOffer(String id) { declinedOffers.add(id); } 68 | 69 | @Override 70 | public void launchTask(String offerId, Task task) { 71 | acceptedOffers.add(offerId); 72 | launchedTasks.add(task); 73 | } 74 | 75 | @Override 76 | public void reconcileTasks(List ids) { 77 | if (ids.isEmpty()) reconciledTasks.add(""); 78 | reconciledTasks.addAll(ids); 79 | } 80 | 81 | @Override 82 | public void killTask(String id) { killedTasks.add(id); } 83 | 84 | @Override 85 | public boolean run() { throw new UnsupportedOperationException(); } 86 | 87 | @Override 88 | public void stop() { stopped = true; } 89 | 90 | public static class Message { 91 | public String executorId; 92 | public String slaveId; 93 | public byte[] data; 94 | 95 | public Message(String executorId, String slaveId, byte[] data) { 96 | this.executorId = executorId; 97 | this.slaveId = slaveId; 98 | this.data = data; 99 | } 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/test/net/elodina/mesos/hdfs/HttpServerTest.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import net.elodina.mesos.util.IO; 4 | import net.elodina.mesos.util.Request; 5 | import org.json.simple.JSONArray; 6 | import org.json.simple.JSONAware; 7 | import org.json.simple.JSONObject; 8 | import org.json.simple.parser.JSONParser; 9 | import org.json.simple.parser.ParseException; 10 | import org.junit.After; 11 | import org.junit.Before; 12 | import org.junit.Test; 13 | 14 | import java.io.File; 15 | import java.io.IOException; 16 | import java.util.Arrays; 17 | import java.util.List; 18 | 19 | import static junit.framework.Assert.assertEquals; 20 | import static junit.framework.TestCase.assertTrue; 21 | import static org.junit.Assert.fail; 22 | 23 | public class HttpServerTest extends HdfsMesosTestCase { 24 | private HttpServer server; 25 | private File dir; 26 | 27 | @Before 28 | public void before() throws Exception { 29 | super.before(); 30 | 31 | server = new HttpServer(); 32 | server.start(); 33 | 34 | dir = File.createTempFile(HttpServerTest.class.getSimpleName(), null); 35 | assertTrue(dir.delete()); 36 | assertTrue(dir.mkdirs()); 37 | } 38 | 39 | @After 40 | public void after() throws Exception { 41 | server.stop(); 42 | IO.delete(dir); 43 | 44 | super.after(); 45 | } 46 | 47 | @Test 48 | public void download_jar() throws IOException { 49 | Scheduler.$.config.jar = new File(dir, "hdfs-mesos-0.1.jar"); 50 | IO.writeFile(Scheduler.$.config.jar, "jar"); 51 | 52 | byte[] data = download("/jar/hdfs-mesos.jar"); 53 | assertEquals("jar", new String(data)); 54 | } 55 | 56 | @Test 57 | public void download_hadoop() throws IOException { 58 | Scheduler.$.config.hadoop = new File(dir, "hadoop-1.2.1.tar.gz"); 59 | IO.writeFile(Scheduler.$.config.hadoop, "hadoop"); 60 | 61 | byte[] data = download("/hadoop/hadoop.tar.gz"); 62 | assertEquals("hadoop", new String(data)); 63 | } 64 | 65 | @Test 66 | public void handle_health() throws IOException { 67 | byte[] response = download("/health"); 68 | assertEquals("ok\n", new String(response)); 69 | } 70 | 71 | @Test 72 | public void node_list() throws IOException { 73 | // no nodes 74 | JSONArray json = request("/node/list"); 75 | List nodes = Node.fromJsonArray(json); 76 | assertTrue(nodes.isEmpty()); 77 | 78 | // 2 nodes 79 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 80 | Node dn = Nodes.addNode(new Node("dn", Node.Type.DATANODE)); 81 | 82 | json = request("/node/list"); 83 | nodes = Node.fromJsonArray(json); 84 | assertEquals(Arrays.asList(nn, dn), nodes); 85 | 86 | // single node 87 | json = request("/node/list?node=nn"); 88 | nodes = Node.fromJsonArray(json); 89 | assertEquals(Arrays.asList(nn), nodes); 90 | 91 | // invalid node 92 | try { request("/node/list?node=0..a"); fail(); } 93 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid node")); } 94 | } 95 | 96 | @Test 97 | public void node_add_update() throws IOException { 98 | // add namenode 99 | JSONArray json = request("/node/add?node=nn&type=namenode"); 100 | assertEquals(1, Nodes.getNodes().size()); 101 | 102 | Node nn = Nodes.getNode("nn"); 103 | assertEquals(Node.Type.NAMENODE, nn.type); 104 | assertEquals(Arrays.asList(nn), Node.fromJsonArray(json)); 105 | 106 | // add datanode 107 | json = request("/node/add?node=dn&type=datanode"); 108 | assertEquals(2, Nodes.getNodes().size()); 109 | 110 | Node dn = Nodes.getNode("dn"); 111 | assertEquals(Node.Type.DATANODE, dn.type); 112 | assertEquals(Arrays.asList(dn), Node.fromJsonArray(json)); 113 | 114 | // update nodes 115 | json = request("/node/update?node=*&mem=2048"); 116 | assertEquals(Arrays.asList(nn, dn), Node.fromJsonArray(json)); 117 | 118 | assertEquals(2048, nn.mem); 119 | assertEquals(2048, dn.mem); 120 | } 121 | 122 | @Test 123 | public void node_add_update_node_validation() { 124 | // no node 125 | try { request("/node/add"); fail(); } 126 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node required")); } 127 | 128 | // invalid node 129 | try { request("/node/add?node=0..a"); fail(); } 130 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid node")); } 131 | 132 | // duplicate node 133 | Node dn = Nodes.addNode(new Node("dn", Node.Type.DATANODE)); 134 | try { request("/node/add?node=dn"); fail(); } 135 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("duplicate node")); } 136 | 137 | // node not found 138 | try { request("/node/update?node=unknown"); fail(); } 139 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node not found")); } 140 | 141 | // node not idle 142 | dn.state = Node.State.STARTING; 143 | try { request("/node/update?node=dn"); fail(); } 144 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node not idle")); } 145 | } 146 | 147 | @Test 148 | public void node_add_update_type_validation() { 149 | // no type 150 | try { request("/node/add?node=a"); fail(); } 151 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("type required")); } 152 | 153 | // invalid type 154 | try { request("/node/add?node=a&type=abc"); fail(); } 155 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid type")); } 156 | 157 | // duplicate namenode 158 | Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 159 | try { request("/node/add?node=nn2&type=namenode"); fail(); } 160 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("duplicate namenode")); } 161 | } 162 | 163 | @Test 164 | public void node_add_update_other_validation() { 165 | Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 166 | 167 | // cpus 168 | try { request("/node/update?node=nn&cpus=invalid"); fail(); } 169 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid cpus")); } 170 | 171 | // mem 172 | try { request("/node/update?node=nn&mem=invalid"); fail(); } 173 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid mem")); } 174 | 175 | // coreSiteOpts 176 | try { request("/node/update?node=nn&coreSiteOpts=invalid"); fail(); } 177 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid coreSiteOpts")); } 178 | 179 | // hdfsSiteOpts 180 | try { request("/node/update?node=nn&hdfsSiteOpts=invalid"); fail(); } 181 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid hdfsSiteOpts")); } 182 | 183 | // failoverDelay 184 | try { request("/node/update?node=nn&failoverDelay=invalid"); fail(); } 185 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid failoverDelay")); } 186 | 187 | // failoverMaxDelay 188 | try { request("/node/update?node=nn&failoverMaxDelay=invalid"); fail(); } 189 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid failoverMaxDelay")); } 190 | 191 | // failoverMaxTries 192 | try { request("/node/update?node=nn&failoverMaxTries=invalid"); fail(); } 193 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid failoverMaxTries")); } 194 | } 195 | 196 | @Test 197 | public void node_start_stop() throws IOException { 198 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 199 | 200 | // schedule start 201 | JSONObject json = request("/node/start?node=nn&timeout=0"); 202 | assertEquals("scheduled", "" + json.get("status")); 203 | assertEquals(Node.State.STARTING, nn.state); 204 | 205 | // schedule stop 206 | json = request("/node/stop?node=nn&timeout=0"); 207 | assertEquals("scheduled", "" + json.get("status")); 208 | assertEquals(Node.State.STOPPING, nn.state); 209 | } 210 | 211 | @Test 212 | public void node_start_stop_validation() { 213 | // node required 214 | try { request("/node/start"); fail(); } 215 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node required")); } 216 | 217 | // invalid node 218 | try { request("/node/start?node=0..a"); fail(); } 219 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid node")); } 220 | 221 | // node not found 222 | try { request("/node/start?node=a"); fail(); } 223 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node not found")); } 224 | 225 | // node not idle 226 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 227 | nn.state = Node.State.RUNNING; 228 | 229 | try { request("/node/start?node=nn"); fail(); } 230 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node not idle")); } 231 | 232 | // node idle 233 | nn.state = Node.State.IDLE; 234 | try { request("/node/stop?node=nn"); fail(); } 235 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node idle")); } 236 | 237 | // node external 238 | nn.externalFsUri = "uri"; 239 | try { request("/node/start?node=nn"); fail(); } 240 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node external")); } 241 | 242 | // timeout 243 | nn.externalFsUri = null; 244 | try { request("/node/start?node=nn&timeout=invalid"); fail(); } 245 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid timeout")); } 246 | } 247 | 248 | @Test 249 | public void node_remove() throws IOException { 250 | Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 251 | Nodes.addNode(new Node("dn", Node.Type.DATANODE)); 252 | 253 | JSONArray json = request("/node/remove?node=dn"); 254 | assertEquals(1, Nodes.getNodes().size()); 255 | assertEquals(Arrays.asList("dn"), json); 256 | 257 | json = request("/node/remove?node=nn"); 258 | assertTrue(Nodes.getNodes().isEmpty()); 259 | assertEquals(Arrays.asList("nn"), json); 260 | } 261 | 262 | @Test 263 | public void node_remove_validation() { 264 | // node required 265 | try { request("/node/remove"); fail(); } 266 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node required")); } 267 | 268 | // invalid node 269 | try { request("/node/remove?node=0..a"); fail(); } 270 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("invalid node")); } 271 | 272 | // node not found 273 | try { request("/node/remove?node=a"); fail(); } 274 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node not found")); } 275 | 276 | try { request("/node/remove?node=a*"); fail(); } 277 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node not found")); } 278 | 279 | // node not idle 280 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 281 | nn.state = Node.State.RUNNING; 282 | try { request("/node/remove?node=nn"); fail(); } 283 | catch (IOException e) { assertTrue(e.getMessage(), e.getMessage().contains("node not idle")); } 284 | } 285 | 286 | @SuppressWarnings("unchecked") 287 | public T request(String uri) throws IOException { 288 | Request.Response response = new Request(Cli.api + "/api" + uri).send(); 289 | if (response.code() != 200) throw new IOException("Error " + response.code() + ": " + response.message()); 290 | 291 | String json = response.text(); 292 | if (json == null) return null; 293 | 294 | try { return (T) new JSONParser().parse(json); } 295 | catch (ParseException e) { throw new IOException(e); } 296 | } 297 | 298 | public byte[] download(String uri) throws IOException { 299 | return new Request(Cli.api + uri).send().body(); 300 | } 301 | } 302 | -------------------------------------------------------------------------------- /src/test/net/elodina/mesos/hdfs/NodeTest.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import net.elodina.mesos.api.*; 4 | import net.elodina.mesos.util.Constraint; 5 | import net.elodina.mesos.util.Period; 6 | import net.elodina.mesos.util.Range; 7 | import net.elodina.mesos.util.Strings; 8 | import org.junit.Test; 9 | 10 | import java.util.*; 11 | 12 | import static net.elodina.mesos.hdfs.Node.Failover; 13 | import static net.elodina.mesos.hdfs.Node.Stickiness; 14 | import static org.junit.Assert.*; 15 | 16 | public class NodeTest extends HdfsMesosTestCase { 17 | @Test 18 | public void matches() { 19 | Node node = new Node("0", Node.Type.NAMENODE); 20 | node.cpus = 0.5; 21 | node.mem = 500; 22 | 23 | assertEquals("cpus < 0.5", node.matches(new Offer("resources:[cpus:0.1]"))); 24 | assertEquals("mem < 500", node.matches(new Offer("resources:[cpus:0.5; mem:400]"))); 25 | } 26 | 27 | @Test 28 | public void matches_namenode_state() { 29 | Node node = new Node("0", Node.Type.NAMENODE); 30 | node.cpus = 0.5; 31 | node.mem = 500; 32 | 33 | Offer offer = new Offer("id:1, frameworkId:2, slaveId:3, hostname:host, resources:[cpus:0.5; mem:500; ports:0..4]"); 34 | assertNull(node.matches(offer)); 35 | 36 | // no name node 37 | node.type = Node.Type.DATANODE; 38 | assertEquals("no namenode", node.matches(offer)); 39 | 40 | // no running or external namenode 41 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 42 | assertEquals("no running or external namenode", node.matches(offer)); 43 | 44 | // external namenode 45 | nn.externalFsUri = "fs-uri"; 46 | assertNull(node.matches(offer)); 47 | 48 | // running namenode 49 | nn.externalFsUri = null; 50 | nn.initRuntime(offer); 51 | nn.state = Node.State.RUNNING; 52 | assertNull(node.matches(offer)); 53 | } 54 | 55 | @Test 56 | public void matches_constraints() { 57 | class O { 58 | Offer offer(String attributes) { 59 | return new Offer("id:id, frameworkId:fwId, slaveId:slaveId, hostname:host, resources:[cpus:2;mem:2048;ports:0..10]") 60 | .attributes(Attribute.parse(attributes)); 61 | } 62 | 63 | Map constraints(String s) { 64 | Map result = new HashMap<>(); 65 | Map m = Strings.parseMap(s); 66 | for (String name : m.keySet()) result.put(name, new Constraint(m.get(name))); 67 | return result; 68 | } 69 | 70 | Map> otherAttributes(String s) { 71 | Map> result = new HashMap<>(); 72 | 73 | Map m = Strings.parseMap(s); 74 | for (String name : m.keySet()) { 75 | if (!result.containsKey(name)) result.put(name, new ArrayList()); 76 | result.get(name).addAll(Arrays.asList(m.get(name).split(";"))); 77 | } 78 | 79 | return result; 80 | } 81 | } 82 | O o = new O(); 83 | 84 | Node node = Nodes.addNode(new Node("mm", Node.Type.NAMENODE)); 85 | 86 | // like 87 | node.constraints = o.constraints("rack=like:1-.*"); 88 | assertEquals(null, node.matches(o.offer("rack=1-1"))); 89 | assertEquals(null, node.matches(o.offer("rack=1-2"))); 90 | assertEquals("rack doesn't match like:1-.*", node.matches(o.offer("rack=2-1"))); 91 | 92 | // groupBy 93 | node.constraints = o.constraints("rack=groupBy"); 94 | assertEquals(null, node.matches(o.offer("rack=1"))); 95 | assertEquals(null, node.matches(o.offer("rack=1"), o.otherAttributes("rack=1"), new Date())); 96 | assertEquals("rack doesn't match groupBy", node.matches(o.offer("rack=2"), o.otherAttributes("rack=1"), new Date())); 97 | } 98 | 99 | @Test 100 | public void matches_stickiness() { 101 | Node node = Nodes.addNode(new Node("nn")); 102 | String host0 = "host0"; 103 | String host1 = "host1"; 104 | String resources = "cpus:" + node.cpus + ";mem:" + node.mem + ";ports:0..10"; 105 | 106 | Offer offer0 = new Offer("hostname:" + host0 + ", resources:[" + resources + "]"); 107 | Offer offer1 = new Offer("hostname:" + host1 + ", resources:[" + resources + "]"); 108 | 109 | assertEquals(null, node.matches(offer0, new Date(0))); 110 | assertEquals(null, node.matches(offer1, new Date(0))); 111 | 112 | node.registerStart(host0); 113 | node.registerStop(new Date(0), false); 114 | 115 | assertEquals(null, node.matches(offer0, new Date(0))); 116 | assertEquals("hostname != stickiness hostname", node.matches(offer1, new Date(0))); 117 | assertEquals(null, node.matches(offer1, new Date(node.stickiness.period.ms()))); 118 | } 119 | 120 | @Test 121 | public void reserve() { 122 | Node node = new Node("0"); 123 | node.cpus = 0.5; 124 | node.mem = 400; 125 | 126 | // incomplete reservation 127 | Node.Reservation reservation = node.reserve(new Offer("resources:[cpus:0.3;mem:300]")); 128 | assertEquals(0.3d, reservation.cpus, 0.001); 129 | assertEquals(300, reservation.mem); 130 | assertTrue("" + reservation.ports, reservation.ports.isEmpty()); 131 | 132 | // complete reservation 133 | reservation = node.reserve(new Offer("resources:[cpus:0.7;mem:1000;ports:0..10]")); 134 | assertEquals(node.cpus, reservation.cpus, 0.001); 135 | assertEquals(node.mem, reservation.mem); 136 | assertEquals(2, reservation.ports.size()); 137 | assertEquals(new Integer(0), reservation.ports.get(Node.Port.HTTP)); 138 | assertEquals(new Integer(1), reservation.ports.get(Node.Port.IPC)); 139 | } 140 | 141 | @Test 142 | public void reservePort() { 143 | Node node = new Node("0"); 144 | List ports = new ArrayList<>(); 145 | ports.add(new Range("0..100")); 146 | 147 | assertEquals(10, node.reservePort(new Range("10..20"), ports)); 148 | assertEquals(Arrays.asList(new Range("0..9"), new Range("11..100")), ports); 149 | 150 | assertEquals(0, node.reservePort(new Range("0..0"), ports)); 151 | assertEquals(Arrays.asList(new Range("1..9"), new Range("11..100")), ports); 152 | 153 | assertEquals(100, node.reservePort(new Range("100..200"), ports)); 154 | assertEquals(Arrays.asList(new Range("1..9"), new Range("11..99")), ports); 155 | 156 | assertEquals(50, node.reservePort(new Range("50..60"), ports)); 157 | assertEquals(Arrays.asList(new Range("1..9"), new Range("11..49"), new Range("51..99")), ports); 158 | } 159 | 160 | @Test 161 | public void initRuntime() { 162 | Node node = Nodes.addNode(new Node("0")); 163 | node.cpus = 0.1; 164 | node.mem = 100; 165 | 166 | Offer offer = new Offer("id:id, frameworkId:fwId, slaveId:slaveId, hostname:host, resources:[cpus:2;mem:1024;ports:0..10], attributes:[a=1,b=2]"); 167 | node.initRuntime(offer); 168 | 169 | assertNotNull(node.runtime); 170 | assertNotNull(node.runtime.taskId); 171 | assertNotNull(node.runtime.executorId); 172 | assertNotNull(node.runtime.fsUri); 173 | 174 | assertEquals(offer.slaveId(), node.runtime.slaveId); 175 | assertEquals(offer.hostname(), node.runtime.hostname); 176 | assertEquals(Strings.parseMap("a=1,b=2"), node.runtime.attributes); 177 | 178 | assertNotNull(node.reservation); 179 | assertEquals(0.1, node.reservation.cpus, 0.001); 180 | assertEquals(100, node.reservation.mem); 181 | } 182 | 183 | @Test 184 | public void initRuntime_fsUri() { 185 | Node node = Nodes.addNode(new Node("0", Node.Type.NAMENODE)); 186 | 187 | // name node 188 | Offer offer = new Offer("hostname:master, resources:[ports:0..10]"); 189 | node.initRuntime(offer); 190 | assertTrue(node.runtime.fsUri, node.runtime.fsUri.contains(offer.hostname())); 191 | 192 | // data node, no name node 193 | node.type = Node.Type.DATANODE; 194 | try { node.initRuntime(offer); fail(); } 195 | catch (IllegalStateException e) { assertTrue(e.getMessage(), e.getMessage().contains("no namenode")); } 196 | 197 | // data node, running name node 198 | Node nn = Nodes.addNode(new Node("1", Node.Type.NAMENODE)); 199 | nn.initRuntime(offer); 200 | node.initRuntime(offer); 201 | assertTrue(node.runtime.fsUri, node.runtime.fsUri.contains(nn.runtime.hostname)); 202 | 203 | // data node, external name node 204 | nn.runtime = null; 205 | nn.externalFsUri = "fs-uri"; 206 | node.initRuntime(offer); 207 | assertEquals(nn.externalFsUri, node.runtime.fsUri); 208 | } 209 | 210 | @Test 211 | public void newTask() { 212 | Node node = Nodes.addNode(new Node("0")); 213 | node.initRuntime(new Offer("resources:[ports:0..10]")); 214 | 215 | Task task = node.newTask(); 216 | assertEquals(task.id(), node.runtime.taskId); 217 | assertEquals("hdfs-" + node.id, task.name()); 218 | assertEquals(task.slaveId(), node.runtime.slaveId); 219 | 220 | assertNotNull(task.executor()); 221 | assertEquals("" + node.toJson(), new String(task.data())); 222 | assertEquals(node.reservation.toResources(), task.resources()); 223 | } 224 | 225 | @Test 226 | public void newExecutor() { 227 | Node node = Nodes.addNode(new Node("0")); 228 | node.executorJvmOpts = "-Xmx100m"; 229 | node.initRuntime(new Offer("resources:[ports:0..10]")); 230 | 231 | Task.Executor executor = node.newExecutor(); 232 | assertEquals("hdfs-" + node.id, executor.name()); 233 | assertEquals(node.runtime.executorId, executor.id()); 234 | 235 | // uris 236 | Command command = executor.command(); 237 | assertEquals(2, command.uris().size()); 238 | 239 | String uri = command.uris().get(0).value(); 240 | assertTrue(uri, uri.contains(Scheduler.$.config.jar.getName())); 241 | uri = command.uris().get(1).value(); 242 | assertTrue(uri, uri.contains(Scheduler.$.config.hadoop.getName())); 243 | 244 | // cmd 245 | String cmd = command.value(); 246 | assertTrue(cmd, cmd.contains("java")); 247 | assertTrue(cmd, cmd.contains(node.executorJvmOpts)); 248 | assertTrue(cmd, cmd.contains(Executor.class.getName())); 249 | } 250 | 251 | @Test 252 | public void toJson_fromJson() { 253 | Node node = Nodes.addNode(new Node("node")); 254 | node.type = Node.Type.NAMENODE; 255 | node.state = Node.State.RUNNING; 256 | 257 | node.cpus = 2; 258 | node.mem = 1024; 259 | 260 | node.constraints.put("hostname", new Constraint("like:master")); 261 | node.constraints.put("a", new Constraint("like:1")); 262 | 263 | node.executorJvmOpts = "executor-opts"; 264 | node.executorJvmOpts = "hadoop-opts"; 265 | node.coreSiteOpts.put("a", "1"); 266 | node.hdfsSiteOpts.put("b", "2"); 267 | 268 | node.externalFsUri = "external-fs-uri"; 269 | 270 | node.initRuntime(new Offer()); 271 | node.stickiness.registerStart("hostname"); 272 | node.failover.failures = 5; 273 | 274 | Node read = new Node(node.toJson()); 275 | assertEquals(node.id, read.id); 276 | assertEquals(node.type, read.type); 277 | assertEquals(node.state, read.state); 278 | 279 | assertEquals(node.cpus, read.cpus, 0.001); 280 | assertEquals(node.mem, read.mem); 281 | 282 | assertEquals(node.constraints, read.constraints); 283 | 284 | assertEquals(node.executorJvmOpts, read.executorJvmOpts); 285 | assertEquals(node.hadoopJvmOpts, read.hadoopJvmOpts); 286 | assertEquals(node.coreSiteOpts, read.coreSiteOpts); 287 | assertEquals(node.hdfsSiteOpts, read.hdfsSiteOpts); 288 | 289 | assertEquals(node.externalFsUri, read.externalFsUri); 290 | 291 | assertEquals(node.stickiness.hostname, read.stickiness.hostname); 292 | assertEquals(node.failover.failures, read.failover.failures); 293 | assertNotNull(read.runtime); 294 | assertNotNull(read.reservation); 295 | } 296 | 297 | // Runtime 298 | @Test 299 | public void Runtime_toJson_fromJson() { 300 | Node.Runtime runtime = new Node.Runtime(); 301 | runtime.slaveId = "slaveId"; 302 | runtime.hostname = "hostname"; 303 | runtime.attributes.putAll(Strings.parseMap("a=1,b=2")); 304 | 305 | runtime.fsUri = "hdfs://localhost:31000"; 306 | runtime.killSent = true; 307 | 308 | Node.Runtime read = new Node.Runtime(runtime.toJson()); 309 | assertEquals(runtime.taskId, read.taskId); 310 | assertEquals(runtime.executorId, read.executorId); 311 | 312 | assertEquals(runtime.slaveId, read.slaveId); 313 | assertEquals(runtime.hostname, read.hostname); 314 | assertEquals(runtime.attributes, read.attributes); 315 | 316 | assertEquals(runtime.fsUri, read.fsUri); 317 | assertEquals(runtime.killSent, read.killSent); 318 | } 319 | 320 | // Reservation 321 | @Test 322 | public void Reservation_toJson_fromJson() { 323 | Node.Reservation reservation = new Node.Reservation(); 324 | reservation.cpus = 0.5; 325 | reservation.mem = 256; 326 | reservation.ports.put(Node.Port.HTTP, 10); 327 | reservation.ports.put(Node.Port.IPC, 20); 328 | 329 | Node.Reservation read = new Node.Reservation(reservation.toJson()); 330 | assertEquals(reservation.cpus, read.cpus, 0.001); 331 | assertEquals(reservation.mem, read.mem); 332 | assertEquals(reservation.ports, read.ports); 333 | } 334 | 335 | @Test 336 | public void Reservation_toResources() { 337 | assertEquals(Resource.parse(""), new Node.Reservation().toResources()); 338 | assertEquals(Resource.parse("cpus:0.5;mem:500;ports:1000..1000"), new Node.Reservation(0.5, 500, Collections.singletonMap("ipc", 1000)).toResources()); 339 | } 340 | 341 | // Stickiness 342 | @Test 343 | public void Stickiness_allowsHostname() { 344 | Stickiness stickiness = new Stickiness(); 345 | assertTrue(stickiness.allowsHostname("host0", new Date(0))); 346 | assertTrue(stickiness.allowsHostname("host1", new Date(0))); 347 | 348 | stickiness.registerStart("host0"); 349 | stickiness.registerStop(new Date(0)); 350 | assertTrue(stickiness.allowsHostname("host0", new Date(0))); 351 | assertFalse(stickiness.allowsHostname("host1", new Date(0))); 352 | assertTrue(stickiness.allowsHostname("host1", new Date(stickiness.period.ms()))); 353 | } 354 | 355 | @Test 356 | public void Stickiness_registerStart_registerStop() { 357 | Stickiness stickiness = new Stickiness(); 358 | assertNull(stickiness.hostname); 359 | assertNull(stickiness.stopTime); 360 | 361 | stickiness.registerStart("host"); 362 | assertEquals("host", stickiness.hostname); 363 | assertNull(stickiness.stopTime); 364 | 365 | stickiness.registerStop(new Date(0)); 366 | assertEquals("host", stickiness.hostname); 367 | assertEquals(new Date(0), stickiness.stopTime); 368 | 369 | stickiness.registerStart("host1"); 370 | assertEquals("host1", stickiness.hostname); 371 | assertNull(stickiness.stopTime); 372 | } 373 | 374 | @Test 375 | public void Stickiness_toJson_fromJson() { 376 | Stickiness stickiness = new Stickiness(); 377 | stickiness.registerStart("localhost"); 378 | stickiness.registerStop(new Date(0)); 379 | 380 | Stickiness read = new Stickiness(); 381 | read.fromJson(stickiness.toJson()); 382 | 383 | assertEquals(stickiness.period, read.period); 384 | assertEquals(stickiness.hostname, read.hostname); 385 | assertEquals(stickiness.stopTime, read.stopTime); 386 | } 387 | 388 | // Failover 389 | @Test 390 | public void Failover_currentDelay() { 391 | Failover failover = new Failover(new Period("1s"), new Period("5s")); 392 | 393 | failover.failures = 0; 394 | assertEquals(new Period("0s"), failover.currentDelay()); 395 | 396 | failover.failures = 1; 397 | assertEquals(new Period("1s"), failover.currentDelay()); 398 | 399 | failover.failures = 2; 400 | assertEquals(new Period("2s"), failover.currentDelay()); 401 | 402 | failover.failures = 3; 403 | assertEquals(new Period("4s"), failover.currentDelay()); 404 | 405 | failover.failures = 4; 406 | assertEquals(new Period("5s"), failover.currentDelay()); 407 | 408 | failover.failures = 32; 409 | assertEquals(new Period("5s"), failover.currentDelay()); 410 | 411 | failover.failures = 33; 412 | assertEquals(new Period("5s"), failover.currentDelay()); 413 | 414 | failover.failures = 100; 415 | assertEquals(new Period("5s"), failover.currentDelay()); 416 | 417 | // multiplier boundary 418 | failover.maxDelay = new Period(Integer.MAX_VALUE + "s"); 419 | 420 | failover.failures = 30; 421 | assertEquals(new Period((1 << 29) + "s"), failover.currentDelay()); 422 | 423 | failover.failures = 31; 424 | assertEquals(new Period((1 << 30) + "s"), failover.currentDelay()); 425 | 426 | failover.failures = 32; 427 | assertEquals(new Period((1 << 30) + "s"), failover.currentDelay()); 428 | 429 | failover.failures = 100; 430 | assertEquals(new Period((1 << 30) + "s"), failover.currentDelay()); 431 | } 432 | 433 | @Test 434 | public void Failover_delayExpires() { 435 | Failover failover = new Failover(new Period("1s"), new Period("5s")); 436 | assertEquals(new Date(0), failover.delayExpires()); 437 | 438 | failover.registerFailure(new Date(0)); 439 | assertEquals(new Date(1000), failover.delayExpires()); 440 | 441 | failover.failureTime = new Date(1000); 442 | assertEquals(new Date(2000), failover.delayExpires()); 443 | } 444 | 445 | @Test 446 | public void Failover_isWaitingDelay() { 447 | Failover failover = new Failover(new Period("1s"), new Period("5s")); 448 | assertFalse(failover.isWaitingDelay(new Date(0))); 449 | 450 | failover.registerFailure(new Date(0)); 451 | 452 | assertTrue(failover.isWaitingDelay(new Date(0))); 453 | assertTrue(failover.isWaitingDelay(new Date(500))); 454 | assertTrue(failover.isWaitingDelay(new Date(999))); 455 | assertFalse(failover.isWaitingDelay(new Date(1000))); 456 | } 457 | 458 | @Test 459 | public void Failover_isMaxTriesExceeded() { 460 | Failover failover = new Failover(); 461 | 462 | failover.failures = 100; 463 | assertFalse(failover.isMaxTriesExceeded()); 464 | 465 | failover.maxTries = 50; 466 | assertTrue(failover.isMaxTriesExceeded()); 467 | } 468 | 469 | @Test 470 | public void Failover_registerFailure_resetFailures() { 471 | Failover failover = new Failover(); 472 | assertEquals(0, failover.failures); 473 | assertNull(failover.failureTime); 474 | 475 | failover.registerFailure(new Date(1)); 476 | assertEquals(1, failover.failures); 477 | assertEquals(new Date(1), failover.failureTime); 478 | 479 | failover.registerFailure(new Date(2)); 480 | assertEquals(2, failover.failures); 481 | assertEquals(new Date(2), failover.failureTime); 482 | 483 | failover.resetFailures(); 484 | assertEquals(0, failover.failures); 485 | assertNull(failover.failureTime); 486 | 487 | failover.registerFailure(new Date(0)); 488 | assertEquals(1, failover.failures); 489 | } 490 | 491 | @Test 492 | public void Failover_toJson_fromJson() { 493 | Failover failover = new Failover(new Period("1s"), new Period("5s")); 494 | failover.maxTries = 10; 495 | failover.registerFailure(new Date(0)); 496 | 497 | Failover read = new Failover(failover.toJson()); 498 | assertEquals(failover.delay, read.delay); 499 | assertEquals(failover.maxDelay, read.maxDelay); 500 | assertEquals(failover.maxTries, read.maxTries); 501 | 502 | assertEquals(failover.failures, read.failures); 503 | assertEquals(failover.failureTime, read.failureTime); 504 | } 505 | } 506 | -------------------------------------------------------------------------------- /src/test/net/elodina/mesos/hdfs/NodesTest.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import org.json.simple.JSONObject; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | import java.util.Arrays; 8 | 9 | import static org.junit.Assert.*; 10 | 11 | public class NodesTest { 12 | @Before 13 | public void before() { 14 | Nodes.reset(); 15 | } 16 | 17 | @Test 18 | public void getNodes() { 19 | Node n0 = Nodes.addNode(new Node("n0", Node.Type.NAMENODE)); 20 | Node n1 = Nodes.addNode(new Node("n1", Node.Type.DATANODE)); 21 | Node n2 = Nodes.addNode(new Node("n2", Node.Type.DATANODE)); 22 | assertEquals(Arrays.asList(n0, n1, n2), Nodes.getNodes()); 23 | } 24 | 25 | @Test 26 | public void getNodes_by_type() { 27 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 28 | Node dn0 = Nodes.addNode(new Node("dn0", Node.Type.DATANODE)); 29 | Node dn1 = Nodes.addNode(new Node("dn1", Node.Type.DATANODE)); 30 | 31 | assertEquals(Arrays.asList(nn), Nodes.getNodes(Node.Type.NAMENODE)); 32 | assertEquals(Arrays.asList(dn0, dn1), Nodes.getNodes(Node.Type.DATANODE)); 33 | } 34 | 35 | @Test 36 | public void getNodes_by_state() { 37 | Node n0 = Nodes.addNode(new Node("n0")); 38 | Node n1 = Nodes.addNode(new Node("n1", Node.Type.DATANODE)); 39 | Node n2 = Nodes.addNode(new Node("n2", Node.Type.DATANODE)); 40 | 41 | n1.state = Node.State.RUNNING; 42 | n2.state = Node.State.RUNNING; 43 | 44 | assertEquals(Arrays.asList(n0), Nodes.getNodes(Node.State.IDLE)); 45 | assertEquals(Arrays.asList(n1, n2), Nodes.getNodes(Node.State.RUNNING)); 46 | } 47 | 48 | @Test 49 | public void getNode() { 50 | assertNull(Nodes.getNode("n0")); 51 | Node n0 = Nodes.addNode(new Node("n0")); 52 | assertSame(n0, Nodes.getNode("n0")); 53 | } 54 | 55 | @Test 56 | public void expandExpr() { 57 | Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 58 | Nodes.addNode(new Node("dn0", Node.Type.DATANODE)); 59 | Nodes.addNode(new Node("dn1", Node.Type.DATANODE)); 60 | 61 | // id list 62 | assertEquals(Arrays.asList("nn", "dn2"), Nodes.expandExpr("nn,dn2")); 63 | 64 | // wildcard 65 | assertEquals(Arrays.asList("dn0", "dn1"), Nodes.expandExpr("dn*")); 66 | assertEquals(Arrays.asList("nn", "dn0", "dn1"), Nodes.expandExpr("*")); 67 | 68 | // range 69 | assertEquals(Arrays.asList("1", "2", "3"), Nodes.expandExpr("1..3")); 70 | assertEquals(Arrays.asList("dn1", "dn2", "dn3"), Nodes.expandExpr("dn1..3")); 71 | } 72 | 73 | @Test 74 | public void addNode() { 75 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 76 | assertEquals(Arrays.asList(nn), Nodes.getNodes()); 77 | 78 | // duplicate id 79 | try { Nodes.addNode(new Node("nn", Node.Type.DATANODE)); fail(); } 80 | catch (IllegalArgumentException e) { assertTrue(e.getMessage(), e.getMessage().contains("duplicate")); } 81 | 82 | // second namenode 83 | try { Nodes.addNode(new Node("nn1", Node.Type.NAMENODE)); fail(); } 84 | catch (IllegalArgumentException e) { assertTrue(e.getMessage(), e.getMessage().contains("second")); } 85 | } 86 | 87 | @Test 88 | public void removeNode() { 89 | Node n0 = Nodes.addNode(new Node("n0", Node.Type.NAMENODE)); 90 | Node n1 = Nodes.addNode(new Node("n1", Node.Type.DATANODE)); 91 | Node n2 = Nodes.addNode(new Node("n2", Node.Type.DATANODE)); 92 | assertEquals(Arrays.asList(n0, n1, n2), Nodes.getNodes()); 93 | 94 | Nodes.removeNode(n1); 95 | assertEquals(Arrays.asList(n0, n2), Nodes.getNodes()); 96 | 97 | Nodes.removeNode(n1); 98 | } 99 | 100 | @Test 101 | public void toJson_fromJson() { 102 | Nodes.frameworkId = "id"; 103 | Node n0 = Nodes.addNode(new Node("n0")); 104 | Node n1 = Nodes.addNode(new Node("n1", Node.Type.DATANODE)); 105 | 106 | JSONObject json = Nodes.toJson(); 107 | Nodes.fromJson(json); 108 | 109 | assertEquals("id", Nodes.frameworkId); 110 | assertEquals(Arrays.asList(n0, n1), Nodes.getNodes()); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/test/net/elodina/mesos/hdfs/SchedulerTest.java: -------------------------------------------------------------------------------- 1 | package net.elodina.mesos.hdfs; 2 | 3 | import net.elodina.mesos.api.Master; 4 | import net.elodina.mesos.api.Offer; 5 | import net.elodina.mesos.api.Task; 6 | import net.elodina.mesos.util.Period; 7 | import net.elodina.mesos.util.Strings; 8 | import org.junit.Test; 9 | 10 | import java.util.Arrays; 11 | import java.util.Collection; 12 | import java.util.Date; 13 | import java.util.Map; 14 | 15 | import static org.junit.Assert.*; 16 | 17 | public class SchedulerTest extends HdfsMesosTestCase { 18 | @Test 19 | public void onTaskStarted() { 20 | Task.Status status = new Task.Status("state:running"); 21 | Node node = Nodes.addNode(new Node("0")); 22 | 23 | // no node 24 | Scheduler.$.onTaskStarted(null, status); 25 | assertEquals(1, schedulerDriver.killedTasks.size()); 26 | schedulerDriver.killedTasks.clear(); 27 | 28 | // unexpected states 29 | for (Node.State state : Arrays.asList(Node.State.IDLE, Node.State.STOPPING)) { 30 | node.state = state; 31 | node.initRuntime(new Offer("resources:[ports:0..10]")); 32 | 33 | Scheduler.$.onTaskStarted(node, status); 34 | assertEquals(state, node.state); 35 | 36 | assertEquals(1, schedulerDriver.killedTasks.size()); 37 | schedulerDriver.killedTasks.clear(); 38 | } 39 | 40 | // expected states 41 | for (Node.State state : Arrays.asList(Node.State.STARTING, Node.State.RUNNING, Node.State.RECONCILING)) { 42 | node.state = state; 43 | node.initRuntime(new Offer("resources:[ports:0..10]")); 44 | 45 | Scheduler.$.onTaskStarted(node, status); 46 | assertEquals(Node.State.RUNNING, node.state); 47 | assertEquals(0, schedulerDriver.killedTasks.size()); 48 | } 49 | } 50 | 51 | @Test 52 | public void onTaskStopped() { 53 | Node node = Nodes.addNode(new Node("0")); 54 | Task.Status status = new Task.Status("state:finished"); 55 | 56 | // no node 57 | Scheduler.$.onTaskStopped(null, status); 58 | 59 | // idle 60 | Scheduler.$.onTaskStopped(node, status); 61 | assertEquals(Node.State.IDLE, node.state); 62 | 63 | // expected states 64 | for (Node.State state : Arrays.asList(Node.State.STARTING, Node.State.RUNNING, Node.State.STOPPING, Node.State.RECONCILING)) { 65 | node.state = state; 66 | node.initRuntime(new Offer("resources:[ports:0..10]")); 67 | 68 | Scheduler.$.onTaskStopped(node, status); 69 | assertEquals(state == Node.State.STOPPING ? Node.State.IDLE : Node.State.STARTING, node.state); 70 | assertNull(node.runtime); 71 | assertNull(node.reservation); 72 | } 73 | } 74 | 75 | @Test 76 | public void acceptOffer() { 77 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 78 | nn.state = Node.State.RECONCILING; 79 | 80 | // reconciling 81 | assertEquals("reconciling", Scheduler.$.acceptOffer(new Offer())); 82 | 83 | // nothing to start 84 | nn.state = Node.State.IDLE; 85 | assertEquals("nothing to start", Scheduler.$.acceptOffer(new Offer())); 86 | 87 | // low resources 88 | nn.state = Node.State.STARTING; 89 | nn.cpus = 2; 90 | assertEquals("node nn: cpus < 2.0", Scheduler.$.acceptOffer(new Offer("resources:[cpus:0.1]"))); 91 | 92 | // offer accepted 93 | assertEquals(null, Scheduler.$.acceptOffer(new Offer("resources:[cpus:2;mem:2048;ports:0..10]"))); 94 | assertNotNull(nn.runtime); 95 | assertEquals(1, schedulerDriver.launchedTasks.size()); 96 | } 97 | 98 | @Test 99 | public void launchTask() { 100 | Node node = Nodes.addNode(new Node("nn")); 101 | node.state = Node.State.STARTING; 102 | 103 | Scheduler.$.launchTask(node, new Offer("resources:[ports:0..10]")); 104 | assertEquals(1, schedulerDriver.launchedTasks.size()); 105 | 106 | assertEquals(Node.State.STARTING, node.state); 107 | assertNotNull(node.runtime); 108 | assertNotNull(node.reservation); 109 | } 110 | 111 | @Test 112 | public void otherAttributes() { 113 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 114 | nn.initRuntime(new Offer("hostname:nn, resources:[ports:0..10]")); 115 | nn.runtime.attributes = Strings.parseMap("a=1,b=2"); 116 | 117 | Node dn = Nodes.addNode(new Node("dn", Node.Type.DATANODE)); 118 | dn.initRuntime(new Offer("hostname:dn, resources:[ports:0..10]")); 119 | dn.runtime.attributes = Strings.parseMap("a=3,b=4"); 120 | 121 | Map> attrs = Scheduler.$.otherAttributes(); 122 | assertEquals(3, attrs.size()); 123 | assertEquals(Arrays.asList("nn", "dn"), attrs.get("hostname")); 124 | assertEquals(Arrays.asList("1", "3"), attrs.get("a")); 125 | assertEquals(Arrays.asList("2", "4"), attrs.get("b")); 126 | } 127 | 128 | @Test 129 | public void checkMesosVersion() { 130 | // no version 131 | Scheduler.$.checkMesosVersion(new Master()); 132 | assertTrue(schedulerDriver.stopped); 133 | 134 | // unsupported version 135 | schedulerDriver.stopped = false; 136 | Scheduler.$.checkMesosVersion(new Master("version:0.22.0")); 137 | assertTrue(schedulerDriver.stopped); 138 | 139 | // supported version 140 | schedulerDriver.stopped = false; 141 | Scheduler.$.checkMesosVersion(new Master("version:0.23.0")); 142 | assertFalse(schedulerDriver.stopped); 143 | } 144 | 145 | // Reconciler 146 | @Test 147 | public void Reconciler_isActive() { 148 | // not reconciling 149 | Scheduler.Reconciler reconciler = new Scheduler.Reconciler(); 150 | assertFalse(reconciler.isActive()); 151 | 152 | // reconciling 153 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 154 | nn.state = Node.State.RECONCILING; 155 | assertTrue(reconciler.isActive()); 156 | } 157 | 158 | @Test 159 | public void Reconciler_start() { 160 | Date now = new Date(); 161 | schedulerDriver.reconciledTasks.clear(); 162 | 163 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 164 | nn.initRuntime(new Offer("resources:[ports:0..10]")); 165 | 166 | Node dn = Nodes.addNode(new Node("dn", Node.Type.DATANODE)); 167 | dn.initRuntime(new Offer("resources:[ports:0..10]")); 168 | 169 | // start 170 | Scheduler.Reconciler reconciler = new Scheduler.Reconciler(); 171 | reconciler.start(schedulerDriver, now); 172 | 173 | assertEquals(1, reconciler.getTries()); 174 | assertEquals(now, reconciler.getLastTry()); 175 | 176 | assertEquals(1, schedulerDriver.reconciledTasks.size()); 177 | assertEquals("", schedulerDriver.reconciledTasks.get(0)); 178 | 179 | assertEquals(Node.State.RECONCILING, nn.state); 180 | assertEquals(Node.State.RECONCILING, dn.state); 181 | } 182 | 183 | @Test 184 | public void Reconciler_proceed() { 185 | Date now = new Date(); 186 | 187 | Node nn = Nodes.addNode(new Node("nn", Node.Type.NAMENODE)); 188 | nn.initRuntime(new Offer("resources:[ports:0..10]")); 189 | nn.state = Node.State.RECONCILING; 190 | 191 | Node dn = Nodes.addNode(new Node("dn", Node.Type.DATANODE)); 192 | dn.initRuntime(new Offer("resources:[ports:0..10]")); 193 | dn.state = Node.State.RECONCILING; 194 | 195 | Scheduler.Reconciler reconciler = new Scheduler.Reconciler(new Period("0"), 2); 196 | 197 | // !started 198 | schedulerDriver.reconciledTasks.clear(); 199 | reconciler.proceed(schedulerDriver, now); 200 | 201 | assertEquals(0, reconciler.getTries()); 202 | assertEquals(null, reconciler.getLastTry()); 203 | 204 | // start & proceed 2/2 205 | reconciler.start(schedulerDriver, now); 206 | 207 | schedulerDriver.reconciledTasks.clear(); 208 | reconciler.proceed(schedulerDriver, now); 209 | 210 | assertEquals(2, reconciler.getTries()); 211 | assertEquals(now, reconciler.getLastTry()); 212 | 213 | assertEquals(2, schedulerDriver.reconciledTasks.size()); 214 | assertEquals(nn.runtime.taskId, schedulerDriver.reconciledTasks.get(0)); 215 | assertEquals(dn.runtime.taskId, schedulerDriver.reconciledTasks.get(1)); 216 | 217 | // proceed 3/2 - exceeds maxTries 218 | schedulerDriver.reconciledTasks.clear(); 219 | reconciler.proceed(schedulerDriver, now); 220 | 221 | assertEquals(0, reconciler.getTries()); 222 | assertEquals(null, reconciler.getLastTry()); 223 | 224 | assertEquals(Node.State.STARTING, nn.state); 225 | assertEquals(null, nn.runtime); 226 | 227 | assertEquals(Node.State.STARTING, dn.state); 228 | assertEquals(null, dn.runtime); 229 | 230 | assertEquals(0, schedulerDriver.reconciledTasks.size()); 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /vagrant/README.md: -------------------------------------------------------------------------------- 1 | Mesos in Vagrant 2 | ================ 3 | 4 | - [Intro](#intro) 5 | - [General Info](#general-info) 6 | - [Host Names](#host-names) 7 | - [Startup](#startup) 8 | - [Configuration](#configuration) 9 | - [Logs](#logs) 10 | 11 | Intro 12 | ----- 13 | This project provides Vagrant environment for running Mesos cluster. 14 | 15 | You should be familiar with: 16 | - Vagrant - https://www.vagrantup.com/docs/getting-started/ 17 | - Mesos - http://mesos.apache.org/documentation/latest/ 18 | 19 | General Info 20 | ------------ 21 | Vagrantfile creates Mesos cluster with following nodes: 22 | - master; 23 | - slave0..slave(N-1) (N is specified in Vagrantfile); 24 | 25 | Master node provides WEB UI listening on http://master:5050 26 | Both master and slave nodes runs Mesos slave daemons. 27 | 28 | Master node has pre-installed marathon scheduler. 29 | Slave nodes may have pre-installed docker (uncomment in init.sh). 30 | 31 | Host's public key, placed in `vagrant/.vagrant` dir, will be 32 | copied to `authorized_hosts`, so direct access like `ssh vagrant@master|slaveX` 33 | should work. 34 | 35 | Nodes ssh keys are pre-generated and added to each node's `authorized_hosts`. 36 | So internode `ssh` should work without password. 37 | 38 | For general Mesos overview please refer to 39 | http://mesos.apache.org/documentation/latest/mesos-architecture/ 40 | 41 | Host Names 42 | ---------- 43 | During first run `Vagrantfile` creates `hosts` file which 44 | contains host names for cluster nodes. It is recommended 45 | to append the content of its "cluster nodes" section to `/etc/hosts` 46 | (or other OS-specific location) of the running (hosting) OS to be able to refer 47 | master and slaves by names. 48 | 49 | Startup 50 | ------- 51 | Mesos master and slaves daemons are started automatically. 52 | 53 | Each slave node runs 'mesos-slave' daemon while master runs both 54 | 'mesos-master' and 'mesos-slave' daemons. 55 | 56 | Daemons could be controlled by using: 57 | `/etc/init.d/mesos-{master|slave} {start|stop|status|restart}` 58 | 59 | Configuration 60 | ------------- 61 | Configuration is read from the following locations: 62 | - `/etc/mesos`, `/etc/mesos-{master|slave}` 63 | for general or master|slave specific CLI options; 64 | - `/etc/default/mesos`, `/etc/default/mesos-{master|slave}` 65 | for general or master|slave specific environment vars; 66 | 67 | Please refer to CLI of 'mesos-master|slave' daemons and `/usr/bin/mesos-init-wrapper` 68 | for details. 69 | 70 | Logs 71 | ---- 72 | Logs are written to `/var/log/mesos/mesos-{master|slave}.*` 73 | 74 | -------------------------------------------------------------------------------- /vagrant/Vagrantfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one or more 2 | # contributor license agreements. See the NOTICE file distributed with 3 | # this work for additional information regarding copyright ownership. 4 | # The ASF licenses this file to You under the Apache License, Version 2.0 5 | # (the "License"); you may not use this file except in compliance with 6 | # the License. You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # -*- mode: ruby -*- 16 | # vi: set ft=ruby : 17 | 18 | SLAVES=1 19 | NET_PREFIX="192.168.3." 20 | HOSTNAME=`hostname`[0..-2] 21 | 22 | NODES={"master" => NET_PREFIX + "5"} 23 | (0..SLAVES-1).each do |i| NODES["slave#{i}"] = NET_PREFIX + (6 + i).to_s end 24 | 25 | # create hosts 26 | File.open('.vagrant/hosts', 'w') do |file| 27 | file.write("127.0.0.1\tlocalhost\n") 28 | file.write(NET_PREFIX + "1\t" + HOSTNAME + "\n") 29 | file.write("\n# cluster nodes\n") 30 | NODES.each do |name, ip| file.write("#{ip}\t#{name}\n") end 31 | end 32 | 33 | # create keys 34 | NODES.each do |name, ip| 35 | %x`rm -f .vagrant/#{name}_key*` 36 | %x`ssh-keygen -t rsa -P '' -f .vagrant/#{name}_key -C vagrant@#{name}` 37 | end 38 | 39 | Vagrant.configure(2) do |config| 40 | config.vm.box = "ubuntu/trusty64" 41 | config.vm.synced_folder "../", "/vagrant" 42 | 43 | config.vm.define "master" do |master| 44 | master.vm.provider "virtualbox" do |v| 45 | v.memory = 1524 46 | end 47 | 48 | master.vm.hostname = "master" 49 | master.vm.network :private_network, ip: NODES["master"] 50 | master.vm.provision "shell", path: "init.sh", args: "master" 51 | end 52 | 53 | (0..SLAVES-1).each do |i| 54 | config.vm.define "slave#{i}" do |slave| 55 | slave.vm.provider "virtualbox" do |v| 56 | v.memory = 1024 57 | end 58 | 59 | slave.vm.hostname = "slave#{i}" 60 | slave.vm.network :private_network, ip: NODES[slave.vm.hostname] 61 | slave.vm.provision "shell", path: "init.sh", args: "slave" 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /vagrant/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one or more 4 | # contributor license agreements. See the NOTICE file distributed with 5 | # this work for additional information regarding copyright ownership. 6 | # The ASF licenses this file to You under the Apache License, Version 2.0 7 | # (the "License"); you may not use this file except in compliance with 8 | # the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | install_ssh_keys() { 19 | # own key 20 | cp .vagrant/`hostname`_key.pub /home/vagrant/.ssh/id_rsa.pub 21 | cp .vagrant/`hostname`_key /home/vagrant/.ssh/id_rsa 22 | chown vagrant:vagrant /home/vagrant/.ssh/id_rsa* 23 | 24 | # other hosts keys 25 | cat .vagrant/*_key.pub >> /home/vagrant/.ssh/authorized_keys 26 | } 27 | 28 | install_mesos() { 29 | mode=$1 # master | slave 30 | apt-get -qy install mesos=0.28.0* 31 | 32 | echo "zk://master:2181/mesos" > /etc/mesos/zk 33 | echo '10mins' > /etc/mesos-slave/executor_registration_timeout 34 | echo 'cpus:1;mem:2500;ports:[5000-32000]' > /etc/mesos-slave/resources 35 | 36 | ip=$(cat /etc/hosts | grep `hostname` | grep -E -o "([0-9]{1,3}[\.]){3}[0-9]{1,3}") 37 | echo $ip > "/etc/mesos-$mode/ip" 38 | 39 | if [ $mode == "master" ]; then 40 | ln -s /lib/init/upstart-job /etc/init.d/mesos-master 41 | service mesos-master start 42 | else 43 | apt-get -qy remove zookeeper 44 | fi 45 | 46 | ln -s /lib/init/upstart-job /etc/init.d/mesos-slave 47 | service mesos-slave start 48 | } 49 | 50 | install_marathon() { 51 | apt-get install -qy marathon=0.10.0* 52 | service marathon start 53 | } 54 | 55 | install_docker() { 56 | apt-get install -qy lxc-docker 57 | echo 'docker,mesos' > /etc/mesos-slave/containerizers 58 | service mesos-slave restart 59 | } 60 | 61 | set_java_home() { 62 | # JAVA_HOME 63 | JAVA_BIN_DIR=$(dirname `readlink -f /etc/alternatives/java`) 64 | JAVA_HOME=$(readlink -f $JAVA_BIN_DIR/../../) 65 | echo "export JAVA_HOME=$JAVA_HOME" >> /home/vagrant/.profile 66 | echo 'export PATH=$JAVA_HOME/bin:$PATH' >> /home/vagrant/.profile 67 | } 68 | 69 | install_hadoop() { 70 | version=$1 71 | 72 | pushd /opt 73 | echo "Downloading hadoop $version ..." 74 | wget -q http://archive.apache.org/dist/hadoop/common/hadoop-$version/hadoop-$version.tar.gz 75 | tar -xf hadoop*.tar.gz 76 | rm hadoop*.tar.gz 77 | 78 | HADOOP_HOME=/opt/$(echo hadoop*) 79 | echo "export HADOOP_PREFIX=$HADOOP_HOME" >> /home/vagrant/.profile 80 | echo 'export PATH=$HADOOP_PREFIX/bin:$PATH' >> /home/vagrant/.profile 81 | 82 | popd 83 | } 84 | 85 | if [[ $1 != "master" && $1 != "slave" ]]; then 86 | echo "Usage: $0 master|slave" 87 | exit 1 88 | fi 89 | mode=$1 90 | 91 | cd /vagrant/vagrant 92 | 93 | # name resolution 94 | cp .vagrant/hosts /etc/hosts 95 | 96 | install_ssh_keys 97 | 98 | # disable ipv6 99 | echo -e "\nnet.ipv6.conf.all.disable_ipv6 = 1\n" >> /etc/sysctl.conf 100 | sysctl -p 101 | 102 | # use apt-proxy if present 103 | if [ -f ".vagrant/apt-proxy" ]; then 104 | apt_proxy=$(cat ".vagrant/apt-proxy") 105 | echo "Using apt-proxy: $apt_proxy"; 106 | echo "Acquire::http::Proxy \"$apt_proxy\";" > /etc/apt/apt.conf.d/90-apt-proxy.conf 107 | fi 108 | 109 | # add mesosphere repo 110 | apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E56151BF 111 | DISTRO=$(lsb_release -is | tr '[:upper:]' '[:lower:]') 112 | CODENAME=$(lsb_release -cs) 113 | echo "deb http://repos.mesosphere.io/${DISTRO} ${CODENAME} main" | tee /etc/apt/sources.list.d/mesosphere.list 114 | 115 | # add docker repo 116 | apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 36A1D7869245C8950F966E92D8576A8BA88D21E9 117 | echo "deb http://get.docker.com/ubuntu docker main" > /etc/apt/sources.list.d/docker.list 118 | 119 | # add hadoop repo 120 | add-apt-repository -y ppa:hadoop-ubuntu/stable 121 | 122 | apt-get -qy update 123 | 124 | # install deps 125 | apt-get install -qy vim zip mc curl wget openjdk-7-jre scala git 126 | set_java_home 127 | 128 | install_mesos $mode 129 | if [ $mode == "master" ]; then 130 | install_hadoop "2.7.2" 131 | install_marathon 132 | fi 133 | #install_docker 134 | --------------------------------------------------------------------------------