├── .gitignore ├── README.md ├── mapreduce.jpg ├── pom.xml └── src ├── main └── java │ └── lcy │ └── tinympi4j │ ├── common │ └── SplitableTask.java │ ├── demo │ ├── GrepSplitedtask.java │ └── PrimeSplitedtask.java │ └── master │ ├── BigTask.java │ └── TomcatTool.java └── test └── java └── TestMaster.java /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | /.classpath 3 | /.project 4 | /.settings 5 | /src/test/ 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tinympi4j-master 2 | a micro java offline distributed computation framework __for fun, DO NOT use in production environment !__ 3 | 微型java分布式离线计算框架 4 | 5 | ## 原理 6 | `tinympi4j-master`创建任务并提交到`tinympi4j-slave`执行, 执行完毕后把结果汇总到`tinympi4j-master` 7 | `tinympi4j-slave`可动态加载执行class文件,如需增加新功能,只需在`tinympi4j-master`端新增任务类,而无需修改`tinympi4j-slave`端代码 8 | 9 | ![](https://raw.githubusercontent.com/binaryer/tinympi4j-master/master/mapreduce.jpg) 10 | 11 | ## 特性 12 | + 简单直观, 没有任何学习难度 13 | + 灵活易于扩展 14 | + slave支持多个任务并发/并行执行 15 | + 使用HTTP协议通信 16 | + 场景: 找素数/grep/wordcount/超大文件或大量小文件处理 17 | 18 | ## 不足 19 | + 只支持Java基本数据类型 20 | + 每个subtask类型只支持单class, 不支持内部类,继承,接口 21 | + 没有进度监控,健康监控功能,也无容错设计 22 | + 不支持重新加载类,只能新建类而不能修改slave已经加载过的类 23 | 24 | 25 | ## 使用流程 26 | 1. 在所有计算节点启动 [tinympi4j-slave](https://github.com/binaryer/tinympi4j-slave) 27 | `java -jar tinympi4j-slave-0.2.jar {port}` 28 | 29 | 2. (在tinympi4j-master端) 编写任务类, 实现`SplitableTask`接口 30 | 31 | 3. (在tinympi4j-master端) 参考下面代码,把任务提交到计算节点执行 32 | 33 | 4. (在tinympi4j-master端) 等待所有计算节点执行完毕,获取结果 34 | 35 | __注意java class版本: 如master上java7编译的class,slave上的java版本要>=7__ 36 | 37 | ## 例子 38 | #### 分布式计算10000以内的素数 39 | 40 | ```java 41 | 42 | public static void main(String[] args) { 43 | 44 | //启动master(也就是本机)上的tomcat, 任务完成后slave会回调这个地址 45 | final int masterport = 8086; 46 | final String masterurl = "http://192.168.1.100:" + masterport; 47 | TomcatTool.startMasterTomcat(masterport); 48 | 49 | //创建任务 50 | final BigTask bigtask = BigTask.create(masterurl); 51 | 52 | //添加任务到两台计算节点, 请确保计算节点上的 tinympi4j-slave 已启动 53 | //关于计算节点: https://github.com/binaryer/tinympi4j-slave 54 | bigtask.addTask2Slave("http://192.168.1.101:1234", PrimeSplitedtask.class, new Integer[] { 2, 5000 }); 55 | bigtask.addTask2Slave("http://192.168.1.102:1234", PrimeSplitedtask.class, new Integer[] { 5001, 10000 }); 56 | 57 | //等待所有节点执行完毕 58 | final Collection resultset = bigtask.executeAndWait(); 59 | 60 | //打印结果 61 | for (int n : resultset){ 62 | //System.out.println(n); 63 | } 64 | } 65 | 66 | ``` 67 | 68 | 69 | ```java 70 | 71 | //创建SplitableTask的实现类 72 | public class PrimeSplitedtask implements SplitableTask { 73 | 74 | 75 | @Override 76 | public Serializable execute(Serializable[] params) { 77 | 78 | final int fromnumber = (Integer) params[0]; 79 | final int tonumber = (Integer) params[1]; 80 | final Set resultset = new LinkedHashSet(); 81 | 82 | for (int i = fromnumber; i <= tonumber; i++) { 83 | if (isprime(i)) 84 | resultset.add(i); 85 | } 86 | return (Serializable) resultset; 87 | } 88 | 89 | 90 | //判断是否为素数 91 | private boolean isprime(int number) { 92 | int n = 2; 93 | while (true) { 94 | if (number % n == 0 && number!=n) 95 | return false; 96 | n++; 97 | if (n > Math.sqrt(number)) 98 | return true; 99 | } 100 | } 101 | 102 | } 103 | 104 | ``` 105 | 106 | ## 后续完善 107 | + 子任务进度查询 108 | + slave端更多的设置选项: 如线程池大小 109 | + 单个子任务完成异步回调 110 | + 总任务完成异步回调 111 | + 暂停/继续/取消执行中的任务 112 | + 支持所有数据类型 支持内部类 113 | + 支持重新加载类 114 | + 支持压缩传输 115 | + 支持未完成的任务回传已完成结果 116 | + 支持子节点故障转移 117 | 118 | 119 | ## Author 120 | 林春宇@深圳 121 | chunyu_lin@163.com 122 | -------------------------------------------------------------------------------- /mapreduce.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binaryer/tinympi4j-master/f4ec95c074e84895b68e4e61ca36aa5dac08e841/mapreduce.jpg -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | UTF-8 7 | 8.0.42 8 | 9 | 10 | lcy 11 | tinympi4j-master 12 | 0.2 13 | jar 14 | 15 | tinympi4j-master 16 | http://maven.apache.org 17 | 18 | 19 | 20 | junit 21 | junit 22 | 3.8.1 23 | test 24 | 25 | 26 | 27 | 28 | org.apache.commons 29 | commons-lang3 30 | 3.5 31 | 32 | 33 | 34 | 35 | org.jodd 36 | jodd-http 37 | 3.8.1 38 | 39 | 40 | 41 | 42 | org.apache.tomcat.embed 43 | tomcat-embed-core 44 | ${tomcatversion} 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | com.google.code.gson 57 | gson 58 | 2.8.0 59 | 60 | 61 | 62 | 63 | 64 | commons-io 65 | commons-io 66 | 2.5 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | org.apache.maven.plugins 78 | maven-compiler-plugin 79 | 3.6.1 80 | 81 | 1.8 82 | 1.8 83 | UTF-8 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /src/main/java/lcy/tinympi4j/common/SplitableTask.java: -------------------------------------------------------------------------------- 1 | package lcy.tinympi4j.common; 2 | 3 | import java.io.Serializable; 4 | 5 | public interface SplitableTask { 6 | 7 | 8 | Serializable execute(Serializable[] params); 9 | 10 | 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/lcy/tinympi4j/demo/GrepSplitedtask.java: -------------------------------------------------------------------------------- 1 | package lcy.tinympi4j.demo; 2 | 3 | import java.io.Serializable; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | 7 | import lcy.tinympi4j.common.SplitableTask; 8 | 9 | public class GrepSplitedtask implements SplitableTask { 10 | 11 | @Override 12 | public Serializable execute(Serializable[] params) { 13 | 14 | final String[] lines = (String[]) params[0]; 15 | final String word2grep = (String) params[1]; 16 | final List linelist = new LinkedList(); 17 | 18 | for(String line : lines){ 19 | if(line.contains(word2grep)){ 20 | linelist.add(line); 21 | } 22 | } 23 | 24 | return (Serializable) linelist; 25 | 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/lcy/tinympi4j/demo/PrimeSplitedtask.java: -------------------------------------------------------------------------------- 1 | package lcy.tinympi4j.demo; 2 | 3 | import java.io.Serializable; 4 | import java.util.LinkedHashSet; 5 | import java.util.Set; 6 | 7 | import lcy.tinympi4j.common.SplitableTask; 8 | 9 | public class PrimeSplitedtask implements SplitableTask { 10 | 11 | 12 | @Override 13 | public Serializable execute(Serializable[] params) { 14 | 15 | final int fromnumber = (Integer) params[0]; 16 | final int tonumber = (Integer) params[1]; 17 | final Set resultset = new LinkedHashSet(); 18 | 19 | for (int i = fromnumber; i <= tonumber; i++) { 20 | if(Thread.currentThread().isInterrupted()) 21 | return null; 22 | if (isprime(i)) 23 | resultset.add(i); 24 | } 25 | return (Serializable) resultset; 26 | } 27 | 28 | private boolean isprime(int number) { 29 | int n = 2; 30 | while (true) { 31 | if (number % n == 0 && number!=n) 32 | return false; 33 | n++; 34 | if (n > Math.sqrt(number)) 35 | return true; 36 | } 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/lcy/tinympi4j/master/BigTask.java: -------------------------------------------------------------------------------- 1 | package lcy.tinympi4j.master; 2 | 3 | import java.io.Serializable; 4 | import java.util.ArrayList; 5 | import java.util.Collection; 6 | import java.util.HashMap; 7 | import java.util.HashSet; 8 | import java.util.Map; 9 | import java.util.Set; 10 | import java.util.concurrent.Callable; 11 | import java.util.concurrent.CompletionService; 12 | import java.util.concurrent.ConcurrentHashMap; 13 | import java.util.concurrent.ExecutionException; 14 | import java.util.concurrent.ExecutorCompletionService; 15 | import java.util.concurrent.ExecutorService; 16 | import java.util.concurrent.Executors; 17 | import java.util.concurrent.TimeUnit; 18 | import java.util.concurrent.locks.ReadWriteLock; 19 | import java.util.concurrent.locks.ReentrantReadWriteLock; 20 | import java.util.logging.Logger; 21 | 22 | import org.apache.commons.io.IOUtils; 23 | import org.apache.commons.lang3.RandomStringUtils; 24 | import org.apache.commons.lang3.SerializationUtils; 25 | 26 | import jodd.http.HttpRequest; 27 | import jodd.http.HttpResponse; 28 | import jodd.util.ClassLoaderUtil; 29 | import lcy.tinympi4j.common.SplitableTask; 30 | 31 | 32 | public class BigTask { 33 | 34 | private static final Logger logger = Logger.getLogger(BigTask.class.getName()); 35 | 36 | private String id; 37 | private String masterurl; 38 | private final Collection totalresultset = new ArrayList(100000); 39 | private final Map slavemap = new HashMap(); 40 | private final Set okslavetaskidset = new HashSet(); 41 | 42 | private static final Map> bigmap = new ConcurrentHashMap>(); 43 | 44 | 45 | 46 | public static BigTask findBigmap(String slavetaskid){ 47 | return bigmap.get(slavetaskid.substring(0, 15)); 48 | 49 | } 50 | 51 | private final ReadWriteLock okslavetaskidset_rwl = new ReentrantReadWriteLock(); 52 | 53 | public void addTask2Slave(String slaveurl, Class clazz, Serializable[] params) { 54 | addTask2Slave(slaveurl, clazz, params, 0); 55 | } 56 | 57 | public void addTask2Slave(String slaveurl, Class clazz, Serializable[] params, Integer slaveto) { 58 | final String slavetaskid = String.format("%s-%s", id, RandomStringUtils.random(16, "abcdefghijklmnopqrstuvwxyz")); 59 | slavemap.put(slavetaskid, new Object[]{slaveurl, clazz, params, slaveto}); 60 | logger.info(String.format("distribute subtask to %s, id = %s", slaveurl, slavetaskid)); 61 | } 62 | 63 | 64 | public static BigTask create(String masterurl) { 65 | final BigTask bigtask = new BigTask(); 66 | bigtask.id = RandomStringUtils.random(15, "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 67 | logger.info(String.format("create a task, id = %s", bigtask.id)); 68 | bigtask.masterurl = masterurl; 69 | bigmap.put(bigtask.id, bigtask); 70 | return bigtask; 71 | } 72 | 73 | 74 | 75 | 76 | public Collection executeAndWait(){ 77 | return executeAndWait(0); 78 | } 79 | 80 | public Collection executeAndWait(final Integer sendandread_to){ 81 | 82 | final ExecutorService es = Executors.newFixedThreadPool(slavemap.size()); 83 | final CompletionService completionService = new ExecutorCompletionService(es); 84 | 85 | 86 | for(final String slavetaskid: slavemap.keySet()){ 87 | 88 | completionService.submit(new Callable() { 89 | 90 | //slavemap.put(slavetaskid, new Object[]{slaveurl, clazz, params, slaveto}); 91 | @Override 92 | public Boolean call() throws Exception { 93 | HttpRequest req = HttpRequest.put((String)slavemap.get(slavetaskid)[0]+"/addtask") 94 | .header("slaveto", slavemap.get(slavetaskid)[3].toString()) 95 | .header("masterurl", masterurl) 96 | .header("slavetaskid", slavetaskid) 97 | .header("classbytes", jodd.util.Base64.encodeToString(IOUtils.toByteArray(ClassLoaderUtil.getClassAsStream((Class)slavemap.get(slavetaskid)[1])))) 98 | .header("classname", ((Class)slavemap.get(slavetaskid)[1]).getName()) 99 | .body(SerializationUtils.serialize((Serializable)(slavemap.get(slavetaskid)[2])), "TINYMPI4J-PARAMS") 100 | .connectionTimeout(4000); 101 | 102 | if (sendandread_to != null && sendandread_to>0){ 103 | req = req.timeout(sendandread_to*1000); 104 | } 105 | final HttpResponse res = req.send(); 106 | return res.statusCode() == 200; 107 | } 108 | }); 109 | 110 | } 111 | es.shutdown(); 112 | 113 | for(int i=0;i) SerializationUtils.deserialize(IOUtils.toByteArray(req.getInputStream()))); 51 | 52 | final Writer w = resp.getWriter(); 53 | w.write("ok\n"); 54 | w.flush(); 55 | IOUtils.closeQuietly(w); 56 | } 57 | }); 58 | ctx.addServletMappingDecoded("/ok", "ok"); 59 | } 60 | 61 | try { 62 | tomcat.start(); 63 | } catch (LifecycleException e) { 64 | e.printStackTrace(); 65 | } 66 | 67 | 68 | logger.info(String.format("master started at port %d", port)); 69 | new Thread(new Runnable() { 70 | 71 | @Override 72 | public void run() { 73 | tomcat.getServer().await(); 74 | } 75 | }).start(); 76 | } 77 | 78 | 79 | } 80 | -------------------------------------------------------------------------------- /src/test/java/TestMaster.java: -------------------------------------------------------------------------------- 1 | import java.util.Collection; 2 | 3 | import lcy.tinympi4j.demo.PrimeSplitedtask; 4 | import lcy.tinympi4j.master.BigTask; 5 | import lcy.tinympi4j.master.TomcatTool; 6 | 7 | public class TestMaster { 8 | 9 | public static void main(String[] args) { 10 | 11 | final int masterport = 8086; 12 | final String masterurl = "http://192.168.1.101:" + masterport; 13 | 14 | TomcatTool.startMasterTomcat(masterport); 15 | 16 | final BigTask bigtask = BigTask.create(masterurl); 17 | 18 | bigtask.addTask2Slave("http://127.0.0.1:1234", PrimeSplitedtask.class, 19 | new Integer[] { 2, 50 }); 20 | bigtask.addTask2Slave("http://127.0.0.1:1235", PrimeSplitedtask.class, 21 | new Integer[] { 51, 100 }); 22 | //bigtask.addTask2Slave("http://192.168.1.103:12346", PrimeSplitedtask.class, new Integer[] { 101, 150 }); 23 | //bigtask.addTask2Slave("http://192.168.1.2:1234", PrimeSplitedtask.class, new Integer[] { 201, 300 }); 24 | final Collection resultset = bigtask.executeAndWait(); 25 | 26 | for (int n : resultset){ 27 | System.out.println(n); 28 | } 29 | 30 | } 31 | 32 | } 33 | --------------------------------------------------------------------------------