├── elasticsearchreader ├── src │ └── main │ │ ├── resources │ │ └── plugin.json │ │ ├── java │ │ └── com │ │ │ └── alibaba │ │ │ └── datax │ │ │ └── plugin │ │ │ └── reader │ │ │ └── elasticsearchreader │ │ │ ├── EsTable.java │ │ │ ├── ESReaderErrorCode.java │ │ │ ├── EsField.java │ │ │ ├── gson │ │ │ └── MapTypeAdapter.java │ │ │ ├── Key.java │ │ │ ├── ESClient.java │ │ │ └── EsReader.java │ │ └── assembly │ │ └── package.xml ├── pom.xml └── doc │ └── elasticsearchreader.md ├── elasticsearchwriter ├── src │ └── main │ │ ├── resources │ │ └── plugin.json │ │ ├── java │ │ └── com │ │ │ └── alibaba │ │ │ └── datax │ │ │ └── plugin │ │ │ └── writer │ │ │ └── elasticsearchwriter │ │ │ ├── ESFieldType.java │ │ │ ├── ESWriterErrorCode.java │ │ │ ├── ESColumn.java │ │ │ ├── Key.java │ │ │ ├── ESClient.java │ │ │ └── ESWriter.java │ │ └── assembly │ │ └── package.xml ├── pom.xml └── doc │ └── elasticsearchwriter.md ├── .gitignore ├── README.md ├── package.xml ├── pom.xml └── LICENSE /elasticsearchreader/src/main/resources/plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "elasticsearchreader", 3 | "class": "com.alibaba.datax.plugin.reader.elasticsearchreader.EsReader", 4 | "description": "适用于: 生产环境. 原理: TODO", 5 | "developer": "kesc" 6 | } -------------------------------------------------------------------------------- /elasticsearchwriter/src/main/resources/plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "elasticsearchwriter", 3 | "class": "com.alibaba.datax.plugin.writer.elasticsearchwriter.ESWriter", 4 | "description": "适用于: 生产环境. 原理: TODO", 5 | "developer": "kesc" 6 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Java template 3 | # Compiled class file 4 | *.class 5 | 6 | # Log file 7 | *.log 8 | 9 | # BlueJ files 10 | *.ctxt 11 | 12 | # Mobile Tools for Java (J2ME) 13 | .mtj.tmp/ 14 | 15 | # Package Files # 16 | *.jar 17 | *.war 18 | *.nar 19 | *.ear 20 | *.zip 21 | *.tar.gz 22 | *.rar 23 | 24 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 25 | hs_err_pid* 26 | 27 | .idea 28 | *.iml 29 | target/ 30 | .target -------------------------------------------------------------------------------- /elasticsearchreader/src/main/java/com/alibaba/datax/plugin/reader/elasticsearchreader/EsTable.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.reader.elasticsearchreader; 2 | 3 | import lombok.Getter; 4 | import lombok.Setter; 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | /** 10 | * @author kesc 11 | * @date 2020-05-11 10:06 12 | */ 13 | @Getter 14 | @Setter 15 | public class EsTable { 16 | private String name; 17 | private String nameCase; 18 | private String filter; 19 | private String deleteFilterKey; 20 | private List column = new ArrayList<>(); 21 | } 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datax-elasticsearch 2 | 3 | datax的elacticsearch读写插件 4 | 5 | # Quick Start 6 | 7 | * 获取阿里datax源码,编译并把datax-common发布到本地仓库 8 | * 使用maven编译: 9 | ``` 10 | mvn -U clean package assembly:assembly -Dmaven.test.skip=true 11 | ``` 12 | * 将编译打包后的插件放到datax目录相应的位置 13 | 14 | 15 | # Support Data Channels 16 | 17 | | 类型 | 数据源 | Reader(读) | Writer(写) |文档| 18 | | ------------ | ---------- | :-------: | :-------: |:-------: | 19 | | 无结构化数据存储 | Elasticsearch | √ | √ |[读](https://github.com/Kestrong/datax-elasticsearch/blob/master/elasticsearchreader/doc/elasticsearchreader.md)、[写](https://github.com/Kestrong/datax-elasticsearch/blob/master/elasticsearchwriter/doc/elasticsearchwriter.md)| 20 | 21 | # 参考文档 22 | 23 | * [DataX官方文档](https://github.com/alibaba/DataX) 24 | -------------------------------------------------------------------------------- /elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESFieldType.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.writer.elasticsearchwriter; 2 | 3 | /** 4 | * Created by xiongfeng.bxf on 17/3/1. 5 | */ 6 | public enum ESFieldType { 7 | ID, 8 | STRING, 9 | TEXT, 10 | KEYWORD, 11 | LONG, 12 | INTEGER, 13 | SHORT, 14 | BYTE, 15 | DOUBLE, 16 | FLOAT, 17 | DATE, 18 | BOOLEAN, 19 | BINARY, 20 | INTEGER_RANGE, 21 | FLOAT_RANGE, 22 | LONG_RANGE, 23 | DOUBLE_RANGE, 24 | DATE_RANGE, 25 | GEO_POINT, 26 | GEO_SHAPE, 27 | 28 | IP, 29 | COMPLETION, 30 | TOKEN_COUNT, 31 | 32 | ARRAY, 33 | OBJECT, 34 | FLATTENED, 35 | NESTED; 36 | 37 | public static ESFieldType getESFieldType(String type) { 38 | if (type == null) { 39 | return null; 40 | } 41 | for (ESFieldType f : ESFieldType.values()) { 42 | if (f.name().compareTo(type.toUpperCase()) == 0) { 43 | return f; 44 | } 45 | } 46 | return null; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | tar.gz 8 | dir 9 | 10 | false 11 | 12 | 13 | 14 | elasticsearchreader/target/datax/ 15 | 16 | **/*.* 17 | 18 | datax 19 | 20 | 21 | 22 | 23 | elasticsearchwriter/target/datax/ 24 | 25 | **/*.* 26 | 27 | datax 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /elasticsearchreader/src/main/java/com/alibaba/datax/plugin/reader/elasticsearchreader/ESReaderErrorCode.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.reader.elasticsearchreader; 2 | 3 | import com.alibaba.datax.common.spi.ErrorCode; 4 | 5 | public enum ESReaderErrorCode implements ErrorCode { 6 | BAD_CONFIG_VALUE("ESReader-00", "您配置的值不合法."), 7 | ES_SEARCH_ERROR("ESReader-01", "search出错."), 8 | ES_INDEX_NOT_EXISTS("ESReader-02", "index不存在."), 9 | UNKNOWN_DATA_TYPE("ESReader-03", "无法识别的数据类型."), 10 | COLUMN_CANT_BE_EMPTY("ESReader-04", "column不能为空."), 11 | ; 12 | 13 | private final String code; 14 | private final String description; 15 | 16 | ESReaderErrorCode(String code, String description) { 17 | this.code = code; 18 | this.description = description; 19 | } 20 | 21 | @Override 22 | public String getCode() { 23 | return this.code; 24 | } 25 | 26 | @Override 27 | public String getDescription() { 28 | return this.description; 29 | } 30 | 31 | @Override 32 | public String toString() { 33 | return String.format("Code:[%s], Description:[%s]. ", this.code, 34 | this.description); 35 | } 36 | } -------------------------------------------------------------------------------- /elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriterErrorCode.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.writer.elasticsearchwriter; 2 | 3 | import com.alibaba.datax.common.spi.ErrorCode; 4 | 5 | public enum ESWriterErrorCode implements ErrorCode { 6 | BAD_CONFIG_VALUE("ESWriter-00", "您配置的值不合法."), 7 | ES_INDEX_DELETE("ESWriter-01", "删除index错误."), 8 | ES_INDEX_CREATE("ESWriter-02", "创建index错误."), 9 | ES_MAPPINGS("ESWriter-03", "mappings错误."), 10 | ES_INDEX_INSERT("ESWriter-04", "插入数据错误."), 11 | ES_ALIAS_MODIFY("ESWriter-05", "别名修改错误."), 12 | ; 13 | 14 | private final String code; 15 | private final String description; 16 | 17 | ESWriterErrorCode(String code, String description) { 18 | this.code = code; 19 | this.description = description; 20 | } 21 | 22 | @Override 23 | public String getCode() { 24 | return this.code; 25 | } 26 | 27 | @Override 28 | public String getDescription() { 29 | return this.description; 30 | } 31 | 32 | @Override 33 | public String toString() { 34 | return String.format("Code:[%s], Description:[%s]. ", this.code, 35 | this.description); 36 | } 37 | } -------------------------------------------------------------------------------- /elasticsearchreader/src/main/assembly/package.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | dir 8 | 9 | false 10 | 11 | 12 | src/main/resources 13 | 14 | plugin.json 15 | 16 | plugin/reader/elasticsearchreader 17 | 18 | 19 | target/ 20 | 21 | elasticsearchreader-0.0.1-SNAPSHOT.jar 22 | 23 | plugin/reader/elasticsearchreader 24 | 25 | 26 | 27 | 28 | 29 | false 30 | plugin/reader/elasticsearchreader/libs 31 | runtime 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /elasticsearchwriter/src/main/assembly/package.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | dir 8 | 9 | false 10 | 11 | 12 | src/main/resources 13 | 14 | plugin.json 15 | 16 | plugin/writer/elasticsearchwriter 17 | 18 | 19 | target/ 20 | 21 | elasticsearchwriter-0.0.1-SNAPSHOT.jar 22 | 23 | plugin/writer/elasticsearchwriter 24 | 25 | 26 | 27 | 28 | 29 | false 30 | plugin/writer/elasticsearchwriter/libs 31 | runtime 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /elasticsearchwriter/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | datax-elasticsearch 7 | com.alibaba.datax 8 | 0.0.1-SNAPSHOT 9 | 10 | 11 | 4.0.0 12 | jar 13 | elasticsearchwriter 14 | 0.0.1-SNAPSHOT 15 | 16 | 17 | 18 | 19 | 20 | maven-assembly-plugin 21 | 22 | 23 | src/main/assembly/package.xml 24 | 25 | datax 26 | 27 | 28 | 29 | dwzip 30 | package 31 | 32 | single 33 | 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /elasticsearchreader/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | datax-elasticsearch 7 | com.alibaba.datax 8 | 0.0.1-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | elasticsearchreader 13 | 14 | 15 | 16 | ognl 17 | ognl 18 | 3.1.10 19 | 20 | 21 | 22 | 23 | 24 | 25 | maven-assembly-plugin 26 | 27 | 28 | src/main/assembly/package.xml 29 | 30 | datax 31 | 32 | 33 | 34 | dwzip 35 | package 36 | 37 | single 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /elasticsearchreader/src/main/java/com/alibaba/datax/plugin/reader/elasticsearchreader/EsField.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.reader.elasticsearchreader; 2 | 3 | import org.apache.commons.lang3.StringUtils; 4 | 5 | import java.util.List; 6 | 7 | /** 8 | * @author kesc 9 | * @date 2020-05-09 10:49 10 | */ 11 | public class EsField { 12 | private String name; 13 | private String alias; 14 | private Object value; 15 | private List child; 16 | 17 | public String getName() { 18 | return name; 19 | } 20 | 21 | public void setName(String name) { 22 | this.name = name; 23 | } 24 | 25 | public List getChild() { 26 | return child; 27 | } 28 | 29 | public void setChild(List child) { 30 | this.child = child; 31 | } 32 | 33 | public boolean hasChild() { 34 | return child != null && !child.isEmpty(); 35 | } 36 | 37 | public String getAlias() { 38 | return alias; 39 | } 40 | 41 | public String getFinalName(String nameCase) { 42 | if (StringUtils.isNotBlank(alias)) { 43 | return alias; 44 | } else if (StringUtils.isBlank(name)) { 45 | return ""; 46 | } else if ("UPPERCASE".equalsIgnoreCase(nameCase)) { 47 | return name.toUpperCase(); 48 | } else if ("LOWERCASE".equalsIgnoreCase(nameCase)) { 49 | return name.toLowerCase(); 50 | } 51 | return name; 52 | } 53 | 54 | public void setAlias(String alias) { 55 | this.alias = alias; 56 | } 57 | 58 | public Object getValue() { 59 | return value; 60 | } 61 | 62 | public void setValue(Object value) { 63 | this.value = value; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESColumn.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.writer.elasticsearchwriter; 2 | 3 | import java.util.List; 4 | import java.util.Objects; 5 | 6 | /** 7 | * Created by xiongfeng.bxf on 17/3/2. 8 | */ 9 | public class ESColumn { 10 | 11 | private String name;//: "appkey", 12 | 13 | private String type;//": "TEXT", 14 | 15 | private String timezone; 16 | 17 | private String format; 18 | 19 | private Boolean array; 20 | 21 | private List child; 22 | 23 | private Integer colNo; 24 | 25 | private Boolean ignore; 26 | 27 | public void setName(String name) { 28 | this.name = name; 29 | } 30 | 31 | public void setType(String type) { 32 | this.type = type; 33 | } 34 | 35 | public void setTimeZone(String timezone) { 36 | this.timezone = timezone; 37 | } 38 | 39 | public void setFormat(String format) { 40 | this.format = format; 41 | } 42 | 43 | public String getName() { 44 | return name; 45 | } 46 | 47 | public String getType() { 48 | return type; 49 | } 50 | 51 | public String getTimezone() { 52 | return timezone; 53 | } 54 | 55 | public String getFormat() { 56 | return format; 57 | } 58 | 59 | public void setTimezone(String timezone) { 60 | this.timezone = timezone; 61 | } 62 | 63 | public Boolean isArray() { 64 | return array == null ? Boolean.FALSE : array; 65 | } 66 | 67 | public void setArray(Boolean array) { 68 | this.array = array; 69 | } 70 | 71 | public Boolean getArray() { 72 | return array; 73 | } 74 | 75 | public List getChild() { 76 | return child; 77 | } 78 | 79 | public void setChild(List child) { 80 | this.child = child; 81 | } 82 | 83 | public boolean hasChild() { 84 | return child != null && !child.isEmpty(); 85 | } 86 | 87 | public Integer getColNo() { 88 | return colNo; 89 | } 90 | 91 | public void setColNo(Integer colNo) { 92 | this.colNo = colNo; 93 | } 94 | 95 | public Boolean getIgnore() { 96 | return ignore; 97 | } 98 | 99 | public void setIgnore(Boolean ignore) { 100 | this.ignore = ignore; 101 | } 102 | 103 | @Override 104 | public boolean equals(Object o) { 105 | if (this == o) return true; 106 | if (o == null || getClass() != o.getClass()) return false; 107 | ESColumn column = (ESColumn) o; 108 | return colNo == column.colNo && 109 | Objects.equals(name, column.name) && 110 | Objects.equals(type, column.type) && 111 | Objects.equals(timezone, column.timezone) && 112 | Objects.equals(format, column.format) && 113 | Objects.equals(array, column.array) && 114 | Objects.equals(child, column.child); 115 | } 116 | 117 | @Override 118 | public int hashCode() { 119 | return Objects.hash(name, type, timezone, format, array, child, colNo); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /elasticsearchreader/src/main/java/com/alibaba/datax/plugin/reader/elasticsearchreader/gson/MapTypeAdapter.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.reader.elasticsearchreader.gson; 2 | 3 | import com.google.gson.Gson; 4 | import com.google.gson.TypeAdapter; 5 | import com.google.gson.TypeAdapterFactory; 6 | import com.google.gson.internal.LinkedTreeMap; 7 | import com.google.gson.internal.bind.ObjectTypeAdapter; 8 | import com.google.gson.reflect.TypeToken; 9 | import com.google.gson.stream.JsonReader; 10 | import com.google.gson.stream.JsonToken; 11 | import com.google.gson.stream.JsonWriter; 12 | 13 | import java.io.IOException; 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | import java.util.Map; 17 | 18 | /** 19 | * @author kesc 20 | * @date 2020-10-13 16:09 21 | */ 22 | public class MapTypeAdapter extends TypeAdapter { 23 | public static final TypeAdapterFactory FACTORY = new TypeAdapterFactory() { 24 | @SuppressWarnings("unchecked") 25 | @Override 26 | public TypeAdapter create(Gson gson, TypeToken type) { 27 | if (type.getRawType() == Map.class) { 28 | return (TypeAdapter) new MapTypeAdapter(gson); 29 | } 30 | return null; 31 | } 32 | }; 33 | 34 | private final Gson gson; 35 | 36 | MapTypeAdapter(Gson gson) { 37 | this.gson = gson; 38 | } 39 | 40 | @Override 41 | public Object read(JsonReader in) throws IOException { 42 | JsonToken token = in.peek(); 43 | switch (token) { 44 | case BEGIN_ARRAY: 45 | List list = new ArrayList<>(); 46 | in.beginArray(); 47 | while (in.hasNext()) { 48 | list.add(read(in)); 49 | } 50 | in.endArray(); 51 | return list; 52 | 53 | case BEGIN_OBJECT: 54 | Map map = new LinkedTreeMap<>(); 55 | in.beginObject(); 56 | while (in.hasNext()) { 57 | map.put(in.nextName(), read(in)); 58 | } 59 | in.endObject(); 60 | return map; 61 | 62 | case STRING: 63 | return in.nextString(); 64 | 65 | case NUMBER: 66 | //改写数字的处理逻辑,将数字值分为整型与浮点型 67 | String numberStr = in.nextString(); 68 | if (numberStr.contains(".") || numberStr.contains("e") 69 | || numberStr.contains("E")) { 70 | return Double.parseDouble(numberStr); 71 | } 72 | long value = Long.parseLong(numberStr); 73 | if (value >= Integer.MIN_VALUE && value <= Integer.MAX_VALUE) { 74 | return (int) value; 75 | } 76 | return value; 77 | 78 | case BOOLEAN: 79 | return in.nextBoolean(); 80 | 81 | case NULL: 82 | in.nextNull(); 83 | return null; 84 | 85 | default: 86 | throw new IllegalStateException(); 87 | } 88 | } 89 | 90 | 91 | @Override 92 | public void write(JsonWriter out, Object value) throws IOException { 93 | if (value == null) { 94 | out.nullValue(); 95 | return; 96 | } 97 | 98 | TypeAdapter typeAdapter = gson.getAdapter((Class) value.getClass()); 99 | if (typeAdapter instanceof ObjectTypeAdapter) { 100 | out.beginObject(); 101 | out.endObject(); 102 | return; 103 | } 104 | 105 | typeAdapter.write(out, value); 106 | } 107 | } -------------------------------------------------------------------------------- /elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/Key.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.writer.elasticsearchwriter; 2 | 3 | import com.alibaba.datax.common.util.Configuration; 4 | import org.apache.commons.lang3.StringUtils; 5 | 6 | import java.util.HashMap; 7 | import java.util.Map; 8 | 9 | public final class Key { 10 | // ---------------------------------------- 11 | // 类型定义 主键字段定义 12 | // ---------------------------------------- 13 | public static final String PRIMARY_KEY_COLUMN_NAME = "pk"; 14 | 15 | public static enum ActionType { 16 | UNKONW, 17 | INDEX, 18 | CREATE, 19 | DELETE, 20 | UPDATE 21 | } 22 | 23 | public static ActionType getActionType(Configuration conf) { 24 | String actionType = conf.getString("actionType", "index"); 25 | if ("index".equals(actionType)) { 26 | return ActionType.INDEX; 27 | } else if ("create".equals(actionType)) { 28 | return ActionType.CREATE; 29 | } else if ("delete".equals(actionType)) { 30 | return ActionType.DELETE; 31 | } else if ("update".equals(actionType)) { 32 | return ActionType.UPDATE; 33 | } else { 34 | return ActionType.UNKONW; 35 | } 36 | } 37 | 38 | 39 | public static String getEndpoint(Configuration conf) { 40 | return conf.getNecessaryValue("endpoint", ESWriterErrorCode.BAD_CONFIG_VALUE); 41 | } 42 | 43 | public static String getAccessID(Configuration conf) { 44 | return conf.getString("accessId", ""); 45 | } 46 | 47 | public static String getAccessKey(Configuration conf) { 48 | return conf.getString("accessKey", ""); 49 | } 50 | 51 | public static int getBatchSize(Configuration conf) { 52 | return conf.getInt("batchSize", 1000); 53 | } 54 | 55 | public static int getTrySize(Configuration conf) { 56 | return conf.getInt("trySize", 30); 57 | } 58 | 59 | public static int getTimeout(Configuration conf) { 60 | return conf.getInt("timeout", 600000); 61 | } 62 | 63 | public static boolean isCleanup(Configuration conf) { 64 | return conf.getBool("cleanup", false); 65 | } 66 | 67 | public static boolean isDiscovery(Configuration conf) { 68 | return conf.getBool("discovery", false); 69 | } 70 | 71 | public static boolean isCompression(Configuration conf) { 72 | return conf.getBool("compression", true); 73 | } 74 | 75 | public static boolean isMultiThread(Configuration conf) { 76 | return conf.getBool("multiThread", true); 77 | } 78 | 79 | public static String getIndexName(Configuration conf) { 80 | return conf.getNecessaryValue("index", ESWriterErrorCode.BAD_CONFIG_VALUE); 81 | } 82 | 83 | public static String getTypeName(Configuration conf) { 84 | String indexType = conf.getString("indexType"); 85 | if(StringUtils.isBlank(indexType)){ 86 | indexType = conf.getString("type", getIndexName(conf)); 87 | } 88 | return indexType; 89 | } 90 | 91 | 92 | public static boolean isIgnoreWriteError(Configuration conf) { 93 | return conf.getBool("ignoreWriteError", false); 94 | } 95 | 96 | public static boolean isIgnoreParseError(Configuration conf) { 97 | return conf.getBool("ignoreParseError", true); 98 | } 99 | 100 | 101 | public static boolean isHighSpeedMode(Configuration conf) { 102 | if ("highspeed".equals(conf.getString("mode", ""))) { 103 | return true; 104 | } 105 | return false; 106 | } 107 | 108 | public static String getAlias(Configuration conf) { 109 | return conf.getString("alias", ""); 110 | } 111 | 112 | public static boolean isNeedCleanAlias(Configuration conf) { 113 | String mode = conf.getString("aliasMode", "append"); 114 | if ("exclusive".equals(mode)) { 115 | return true; 116 | } 117 | return false; 118 | } 119 | 120 | public static Map getSettings(Configuration conf) { 121 | return conf.getMap("settings", new HashMap()); 122 | } 123 | 124 | public static String getSplitter(Configuration conf) { 125 | return conf.getString("splitter", "-,-"); 126 | } 127 | 128 | public static boolean getDynamic(Configuration conf) { 129 | return conf.getBool("dynamic", false); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /elasticsearchreader/doc/elasticsearchreader.md: -------------------------------------------------------------------------------- 1 | # DataX ElasticSearchReader 2 | 3 | 4 | --- 5 | 6 | ## 1 快速介绍 7 | 8 | [Datax](https://github.com/alibaba/DataX) 9 | 读取elasticsearch数据的插件 10 | 11 | ## 2 实现原理 12 | 13 | 使用elasticsearch的rest api接口, 批量读取elasticsearch的数据 14 | 15 | ## 3 功能说明 16 | 17 | ### 3.1 配置样例 18 | 19 | #### es索引示例 20 | 21 | ``` 22 | { 23 | "flow_id" : 590000001878, 24 | "taches" : [ 25 | { 26 | "tch_id" : 590000000750, 27 | "flow_id" : 590000001878, 28 | "tch_mod" : 5081 29 | } 30 | ], 31 | } 32 | ``` 33 | 34 | #### job.json 35 | 36 | ``` 37 | { 38 | "core": { 39 | "container": { 40 | "job": { 41 | "reportInterval": 10000 42 | }, 43 | "taskGroup": { 44 | "channel": 5 45 | }, 46 | "trace": { 47 | "enable": "true" 48 | } 49 | } 50 | }, 51 | "job": { 52 | "setting": { 53 | "speed": { 54 | "byte": 10485760 55 | }, 56 | "errorLimit": { 57 | "record": 0, 58 | "percentage": 0.02 59 | } 60 | }, 61 | "content": [ 62 | { 63 | "reader": { 64 | "name": "elasticsearchreader", 65 | "parameter": { 66 | "endpoint": "http://192.168.17.190:9200", 67 | "accessId": "xxxx", 68 | "accessKey": "xxxx", 69 | "index": "test-datax", 70 | "type": "default", 71 | "searchType": "dfs_query_then_fetch", 72 | "headers": { 73 | }, 74 | "scroll": "3m", 75 | "search": [ 76 | { 77 | "size": 5, 78 | "query": { 79 | "bool": { 80 | "must": [ 81 | { 82 | "match": { 83 | "_id": "590000001878" 84 | } 85 | } 86 | ] 87 | } 88 | } 89 | } 90 | ], 91 | "table":{ 92 | "name": "TACHE", 93 | "filter": "pk != null", 94 | "nameCase": "UPPERCASE", 95 | "column": [ 96 | { 97 | "name": "flow_id", 98 | "alias": "pk", 99 | }, 100 | { 101 | "name": "taches", 102 | "child": [ 103 | { 104 | "name": "tch_id" 105 | }, 106 | { 107 | "name": "tch_mod" 108 | }, 109 | { 110 | "name": "flow_id" 111 | } 112 | ] 113 | } 114 | ] 115 | } 116 | } 117 | }, 118 | "writer": { 119 | "name": "streamwriter", 120 | "parameter": { 121 | "print": true, 122 | "encoding": "UTF-8" 123 | } 124 | } 125 | } 126 | ] 127 | } 128 | } 129 | ``` 130 | 131 | #### 3.2 参数说明 132 | 133 | * endpoint 134 | * 描述:ElasticSearch的连接地址 135 | * 必选:是 136 | * 默认值:无 137 | 138 | * accessId 139 | * 描述:http auth中的user 140 | * 必选:否 141 | * 默认值:空 142 | 143 | * accessKey 144 | * 描述:http auth中的password 145 | * 必选:否 146 | * 默认值:空 147 | 148 | * index 149 | * 描述:elasticsearch中的index名 150 | * 必选:是 151 | * 默认值:无 152 | 153 | * type 154 | * 描述:elasticsearch中index的type名 155 | * 必选:否 156 | * 默认值:index名 157 | 158 | * timeout 159 | * 描述:客户端超时时间 160 | * 必选:否 161 | * 默认值:600000 162 | 163 | * discovery 164 | * 描述:启用节点发现将(轮询)并定期更新客户机中的服务器列表。 165 | * 必选:否 166 | * 默认值:false 167 | 168 | * compression 169 | * 描述:http请求,开启压缩 170 | * 必选:否 171 | * 默认值:true 172 | 173 | * multiThread 174 | * 描述:http请求,是否有多线程 175 | * 必选:否 176 | * 默认值:true 177 | 178 | * searchType 179 | * 描述:搜索类型 180 | * 必选:否 181 | * 默认值:dfs_query_then_fetch 182 | 183 | * headers 184 | * 描述:http请求头 185 | * 必选:否 186 | * 默认值:空 187 | 188 | * scroll 189 | * 描述:滚动分页配置 190 | * 必选:否 191 | * 默认值:空 192 | 193 | * search 194 | * 描述:json格式api搜索数据体 195 | * 必选:是 196 | * 默认值:[] 197 | 198 | * table 199 | * 描述: 数据读取规则配置,name命名,nameCase全局字段大小写,filter使用ognl表达式进行过滤 200 | * 必选: 是 201 | * 默认值: 无 202 | 203 | * column 204 | * 描述:需要读取的字段,name对应es文档的key,alias为最终记录的字段名如果为空则使用name,value表示字段为常量,child为嵌套对象 205 | * 必选:是 206 | * 默认值:无 207 | 208 | 209 | ## 4 性能报告 210 | 211 | 略 212 | 213 | ## 5 约束限制 214 | 215 | * filter使用ognl表达式,根对象为整个table对象,key为column最终写入的名称 216 | 217 | ## 6 FAQ -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | datax-all 5 | com.alibaba.datax 6 | 0.0.1-SNAPSHOT 7 | 8 | 4.0.0 9 | 10 | datax-elasticsearch 11 | pom 12 | 0.0.1-SNAPSHOT 13 | 14 | 15 | elasticsearchwriter 16 | elasticsearchreader 17 | 18 | 19 | 20 | 21 | 22 | com.google.code.gson 23 | gson 24 | 2.8.9 25 | 26 | 27 | commons-codec 28 | commons-codec 29 | 1.13 30 | 31 | 32 | org.apache.httpcomponents 33 | httpclient 34 | 4.5.13 35 | 36 | 37 | com.google.guava 38 | guava 39 | 32.0.0-jre 40 | 41 | 42 | 43 | 44 | 45 | 46 | com.alibaba.datax 47 | datax-common 48 | ${datax-project-version} 49 | 50 | 51 | slf4j-log4j12 52 | org.slf4j 53 | 54 | 55 | 56 | 57 | org.slf4j 58 | slf4j-api 59 | 60 | 61 | org.apache.commons 62 | commons-lang3 63 | 3.9 64 | 65 | 66 | com.alibaba 67 | fastjson 68 | 1.2.83 69 | 70 | 71 | ch.qos.logback 72 | logback-classic 73 | 1.2.13 74 | 75 | 76 | io.searchbox 77 | jest-common 78 | 6.3.1 79 | 80 | 81 | io.searchbox 82 | jest 83 | 6.3.1 84 | 85 | 86 | joda-time 87 | joda-time 88 | 2.9.7 89 | 90 | 91 | junit 92 | junit 93 | 4.13.1 94 | test 95 | 96 | 97 | org.projectlombok 98 | lombok 99 | 1.16.18 100 | compile 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | maven-compiler-plugin 109 | 110 | ${jdk-version} 111 | ${jdk-version} 112 | ${project-sourceEncoding} 113 | 114 | 115 | 116 | 117 | maven-assembly-plugin 118 | 119 | datax 120 | 121 | package.xml 122 | 123 | 124 | 125 | 126 | make-assembly 127 | package 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /elasticsearchreader/src/main/java/com/alibaba/datax/plugin/reader/elasticsearchreader/Key.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.reader.elasticsearchreader; 2 | 3 | import com.alibaba.datax.common.util.Configuration; 4 | import com.alibaba.fastjson.JSON; 5 | import io.searchbox.params.SearchType; 6 | import org.apache.commons.lang3.StringUtils; 7 | 8 | import java.util.HashMap; 9 | import java.util.Map; 10 | 11 | public final class Key { 12 | // ---------------------------------------- 13 | // 类型定义 主键字段定义 14 | // ---------------------------------------- 15 | 16 | public static final String SEARCH_KEY = "search"; 17 | 18 | public static enum ActionType { 19 | UNKONW, 20 | INDEX, 21 | CREATE, 22 | DELETE, 23 | UPDATE 24 | } 25 | 26 | public static SearchType getSearchType(Configuration conf) { 27 | String searchType = conf.getString("searchType", SearchType.DFS_QUERY_THEN_FETCH.toString()); 28 | return SearchType.valueOf(searchType.toUpperCase()); 29 | } 30 | 31 | public static ActionType getActionType(Configuration conf) { 32 | String actionType = conf.getString("actionType", "index"); 33 | if ("index".equals(actionType)) { 34 | return ActionType.INDEX; 35 | } else if ("create".equals(actionType)) { 36 | return ActionType.CREATE; 37 | } else if ("delete".equals(actionType)) { 38 | return ActionType.DELETE; 39 | } else if ("update".equals(actionType)) { 40 | return ActionType.UPDATE; 41 | } else { 42 | return ActionType.UNKONW; 43 | } 44 | } 45 | 46 | 47 | public static String getEndpoint(Configuration conf) { 48 | return conf.getNecessaryValue("endpoint", ESReaderErrorCode.BAD_CONFIG_VALUE); 49 | } 50 | 51 | public static String getAccessID(Configuration conf) { 52 | return conf.getString("accessId", ""); 53 | } 54 | 55 | public static String getAccessKey(Configuration conf) { 56 | return conf.getString("accessKey", ""); 57 | } 58 | 59 | public static int getBatchSize(Configuration conf) { 60 | return conf.getInt("batchSize", 1000); 61 | } 62 | 63 | public static int getTrySize(Configuration conf) { 64 | return conf.getInt("trySize", 30); 65 | } 66 | 67 | public static int getTimeout(Configuration conf) { 68 | return conf.getInt("timeout", 600000); 69 | } 70 | 71 | public static boolean isCleanup(Configuration conf) { 72 | return conf.getBool("cleanup", false); 73 | } 74 | 75 | public static boolean isDiscovery(Configuration conf) { 76 | return conf.getBool("discovery", false); 77 | } 78 | 79 | public static boolean isCompression(Configuration conf) { 80 | return conf.getBool("compression", true); 81 | } 82 | 83 | public static boolean isMultiThread(Configuration conf) { 84 | return conf.getBool("multiThread", true); 85 | } 86 | 87 | public static String getIndexName(Configuration conf) { 88 | return conf.getNecessaryValue("index", ESReaderErrorCode.BAD_CONFIG_VALUE); 89 | } 90 | 91 | public static String getTypeName(Configuration conf) { 92 | String indexType = conf.getString("indexType"); 93 | if (StringUtils.isBlank(indexType)) { 94 | indexType = conf.getString("type", getIndexName(conf)); 95 | } 96 | return indexType; 97 | } 98 | 99 | 100 | public static boolean isIgnoreWriteError(Configuration conf) { 101 | return conf.getBool("ignoreWriteError", false); 102 | } 103 | 104 | public static boolean isIgnoreParseError(Configuration conf) { 105 | return conf.getBool("ignoreParseError", true); 106 | } 107 | 108 | 109 | public static boolean isHighSpeedMode(Configuration conf) { 110 | if ("highspeed".equals(conf.getString("mode", ""))) { 111 | return true; 112 | } 113 | return false; 114 | } 115 | 116 | public static String getAlias(Configuration conf) { 117 | return conf.getString("alias", ""); 118 | } 119 | 120 | public static boolean isNeedCleanAlias(Configuration conf) { 121 | String mode = conf.getString("aliasMode", "append"); 122 | if ("exclusive".equals(mode)) { 123 | return true; 124 | } 125 | return false; 126 | } 127 | 128 | public static Map getSettings(Configuration conf) { 129 | return conf.getMap("settings", new HashMap()); 130 | } 131 | 132 | public static Map getHeaders(Configuration conf) { 133 | return conf.getMap("headers", new HashMap<>()); 134 | } 135 | 136 | public static String getQuery(Configuration conf) { 137 | return conf.getConfiguration(Key.SEARCH_KEY).toString(); 138 | } 139 | 140 | public static String getSplitter(Configuration conf) { 141 | return conf.getString("splitter", "-,-"); 142 | } 143 | 144 | public static boolean getDynamic(Configuration conf) { 145 | return conf.getBool("dynamic", false); 146 | } 147 | 148 | public static String getScroll(Configuration conf) { 149 | return conf.getString("scroll"); 150 | } 151 | 152 | public static EsTable getTable(Configuration conf) { 153 | String column = conf.getString("table"); 154 | return JSON.parseObject(column, EsTable.class); 155 | } 156 | 157 | } 158 | -------------------------------------------------------------------------------- /elasticsearchwriter/doc/elasticsearchwriter.md: -------------------------------------------------------------------------------- 1 | # DataX ElasticSearchWriter 2 | 3 | 4 | --- 5 | 6 | ## 1 快速介绍 7 | 8 | [Datax](https://github.com/alibaba/DataX) 9 | 数据导入elasticsearch的插件 10 | 11 | ## 2 实现原理 12 | 13 | 使用elasticsearch的rest api接口, 批量把从reader读入的数据写入elasticsearch 14 | 15 | ## 3 功能说明 16 | 17 | ### 3.1 配置样例 18 | 19 | #### job.json 20 | 21 | 当flatToNested为false(默认)时,写入字段与读入字段按顺序一一对应,当flatToNested为true时必须配置colNo属性,显示指出写入字段对应读入字段的位置,从0开始。child为嵌套对象的字段配置,flatToNested为true时有效。 22 | ``` 23 | { 24 | "job": { 25 | "setting": { 26 | "speed": { 27 | "channel": 1 28 | } 29 | }, 30 | "content": [ 31 | { 32 | "reader": { 33 | ... 34 | }, 35 | "writer": { 36 | "name": "elasticsearchwriter", 37 | "parameter": { 38 | "flatToNested": true, 39 | "endpoint": "http://xxx:9999", 40 | "accessId": "xxxx", 41 | "accessKey": "xxxx", 42 | "index": "test-1", 43 | "type": "default", 44 | "cleanup": true, 45 | "settings": {"index" :{"number_of_shards": 1, "number_of_replicas": 0}}, 46 | "discovery": false, 47 | "batchSize": 1000, 48 | "splitter": ",", 49 | "column": [ 50 | {"name": "uuid", "type": "text", "colNo": 0}, 51 | {"name": "pk", "type": "id", "colNo": 1}, 52 | { "name": "col_ip","type": "ip" , "colNo": 2}, 53 | { "name": "col_double","type": "double" , "colNo": 3}, 54 | { "name": "col_long","type": "long" , "colNo": 4}, 55 | { "name": "col_integer","type": "integer" , "colNo": 5}, 56 | { "name": "col_keyword", "type": "keyword" , "colNo": 6}, 57 | { "name": "col_text", "type": "text", "analyzer": "ik_max_word", "colNo": 7}, 58 | { "name": "col_geo_point", "type": "geo_point" , "colNo": 8}, 59 | { "name": "col_date", "type": "date", "format": "yyyy-MM-dd HH:mm:ss", "colNo": 9}, 60 | { "name": "col_object1", "type": "object" }, 61 | { "name": "col_object2", "type": "object" }, 62 | { "name": "col_integer_array", "type":"integer", "array":true, "colNo": 10}, 63 | { "name": "col_geo_shape", "type":"geo_shape", "tree": "quadtree", "precision": "10m", "colNo": 11}, 64 | { "name": "col_nested", "type": "nested", "child": [{"name": "id_2", "type": "id","ignore":true, "colNo": 12},{ "name": "col_1", "type": "text" , "colNo": 13}] }, 65 | { "name": "col_flattened", "type": "flattened", "child": [{"name": "id_3", "type": "id","ignore":true, "colNo": 14},{ "name": "col_1", "type": "text" , "colNo": 15}] } 66 | ] 67 | } 68 | } 69 | } 70 | ] 71 | } 72 | } 73 | ``` 74 | 75 | #### 3.2 参数说明 76 | 77 | * flatToNested 78 | * 描述:一对多转成嵌套对象,数据必须按主表主键排序,分批时要保证子表数据完整,嵌套对象必须配置id字段用于分组(不想写入es可以设置ignore为true) 79 | * 必选:否 80 | * 默认值:false 81 | 82 | * endpoint 83 | * 描述:ElasticSearch的连接地址 84 | * 必选:是 85 | * 默认值:无 86 | 87 | * accessId 88 | * 描述:http auth中的user 89 | * 必选:否 90 | * 默认值:空 91 | 92 | * accessKey 93 | * 描述:http auth中的password 94 | * 必选:否 95 | * 默认值:空 96 | 97 | * index 98 | * 描述:elasticsearch中的index名 99 | * 必选:是 100 | * 默认值:无 101 | 102 | * type 103 | * 描述:elasticsearch中index的type名 104 | * 必选:否 105 | * 默认值:index名 106 | 107 | * cleanup 108 | * 描述:是否删除原表 109 | * 必选:否 110 | * 默认值:false 111 | 112 | * batchSize 113 | * 描述:每次批量数据的条数 114 | * 必选:否 115 | * 默认值:1000 116 | 117 | * trySize 118 | * 描述:失败后重试的次数 119 | * 必选:否 120 | * 默认值:30 121 | 122 | * timeout 123 | * 描述:客户端超时时间 124 | * 必选:否 125 | * 默认值:600000 126 | 127 | * discovery 128 | * 描述:启用节点发现将(轮询)并定期更新客户机中的服务器列表。 129 | * 必选:否 130 | * 默认值:false 131 | 132 | * compression 133 | * 描述:http请求,开启压缩 134 | * 必选:否 135 | * 默认值:true 136 | 137 | * multiThread 138 | * 描述:http请求,是否有多线程 139 | * 必选:否 140 | * 默认值:true 141 | 142 | * ignoreWriteError 143 | * 描述:忽略写入错误,不重试,继续写入 144 | * 必选:否 145 | * 默认值:false 146 | 147 | * ignoreParseError 148 | * 描述:忽略解析数据格式错误,继续写入 149 | * 必选:否 150 | * 默认值:true 151 | 152 | * alias 153 | * 描述:数据导入完成后写入别名 154 | * 必选:否 155 | * 默认值:无 156 | 157 | * aliasMode 158 | * 描述:数据导入完成后增加别名的模式,append(增加模式), exclusive(只留这一个) 159 | * 必选:否 160 | * 默认值:append 161 | 162 | * settings 163 | * 描述:创建index时候的settings, 与elasticsearch官方相同 164 | * 必选:否 165 | * 默认值:无 166 | 167 | * splitter 168 | * 描述:如果插入数据是array,就使用指定分隔符 169 | * 必选:否 170 | * 默认值:-,- 171 | 172 | * column 173 | * 描述:elasticsearch所支持的字段类型,样例中包含了全部 174 | * 必选:是 175 | 176 | * dynamic 177 | * 描述: 不使用datax的mappings,使用es自己的自动mappings 178 | * 必选: 否 179 | * 默认值: false 180 | 181 | 182 | 183 | ## 4 性能报告 184 | 185 | ### 4.1 环境准备 186 | 187 | * 总数据量 1kw条数据, 每条0.1kb 188 | * 1个shard, 0个replica 189 | * 不加id,这样默认是append_only模式,不检查版本,插入速度会有20%左右的提升 190 | 191 | #### 4.1.1 输入数据类型(streamreader) 192 | 193 | ``` 194 | {"value": "1.1.1.1", "type": "string"}, 195 | {"value": 19890604.0, "type": "double"}, 196 | {"value": 19890604, "type": "long"}, 197 | {"value": 19890604, "type": "long"}, 198 | {"value": "hello world", "type": "string"}, 199 | {"value": "hello world", "type": "string"}, 200 | {"value": "41.12,-71.34", "type": "string"}, 201 | {"value": "2017-05-25", "type": "string"}, 202 | ``` 203 | 204 | #### 4.1.2 输出数据类型(eswriter) 205 | 206 | ``` 207 | { "name": "col_ip","type": "ip" }, 208 | { "name": "col_double","type": "double" }, 209 | { "name": "col_long","type": "long" }, 210 | { "name": "col_integer","type": "integer" }, 211 | { "name": "col_keyword", "type": "keyword" }, 212 | { "name": "col_text", "type": "text"}, 213 | { "name": "col_geo_point", "type": "geo_point" }, 214 | { "name": "col_date", "type": "date"} 215 | ``` 216 | 217 | #### 4.1.2 机器参数 218 | 219 | 1. cpu: 32 Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz 220 | 2. mem: 128G 221 | 3. net: 千兆双网卡 222 | 223 | #### 4.1.3 DataX jvm 参数 224 | 225 | -Xms1024m -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError 226 | 227 | ### 4.2 测试报告 228 | 229 | | 通道数| 批量提交行数| DataX速度(Rec/s)|DataX流量(MB/s)| 230 | |--------|--------| --------|--------| 231 | | 4| 256| 11013| 0.828| 232 | | 4| 1024| 19417| 1.43| 233 | | 4| 4096| 23923| 1.76| 234 | | 4| 8172| 24449| 1.80| 235 | | 8| 256| 21459| 1.58| 236 | | 8| 1024| 37037| 2.72| 237 | | 8| 4096| 45454| 3.34| 238 | | 8| 8172| 45871| 3.37| 239 | | 16| 1024| 67567| 4.96| 240 | | 16| 4096| 78125| 5.74| 241 | | 16| 8172| 77519| 5.69| 242 | | 32| 1024| 94339| 6.93| 243 | | 32| 4096| 96153| 7.06| 244 | | 64| 1024| 91743| 6.74| 245 | 246 | ### 4.3 测试总结 247 | 248 | * 最好的结果是32通道,每次传4096,如果单条数据很大, 请适当减少批量数,防止oom 249 | * 当然这个很容易水平扩展,而且es也是分布式的,多设置几个shard也可以水平扩展 250 | 251 | ## 5 约束限制 252 | 253 | * 如果导入id,这样数据导入失败也会重试,重新导入也仅仅是覆盖,保证数据一致性 254 | * 如果不导入id,就是append_only模式,elasticsearch自动生成id,速度会提升20%左右,但数据无法修复,适合日志型数据(对数据精度要求不高的) -------------------------------------------------------------------------------- /elasticsearchreader/src/main/java/com/alibaba/datax/plugin/reader/elasticsearchreader/ESClient.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.reader.elasticsearchreader; 2 | 3 | import com.google.gson.Gson; 4 | import com.google.gson.JsonElement; 5 | import com.google.gson.JsonObject; 6 | import com.google.gson.JsonParser; 7 | import io.searchbox.action.Action; 8 | import io.searchbox.client.JestClient; 9 | import io.searchbox.client.JestClientFactory; 10 | import io.searchbox.client.JestResult; 11 | import io.searchbox.client.config.HttpClientConfig.Builder; 12 | import io.searchbox.core.ClearScroll; 13 | import io.searchbox.core.Search; 14 | import io.searchbox.core.SearchResult; 15 | import io.searchbox.core.SearchScroll; 16 | import io.searchbox.indices.IndicesExists; 17 | import io.searchbox.indices.aliases.*; 18 | import io.searchbox.params.SearchType; 19 | import org.apache.commons.lang3.StringUtils; 20 | import org.apache.http.HttpHost; 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | import java.io.IOException; 25 | import java.util.ArrayList; 26 | import java.util.List; 27 | import java.util.Map; 28 | import java.util.concurrent.TimeUnit; 29 | 30 | /** 31 | * @author kesc 32 | * @date 2020-04-14 10:32 33 | */ 34 | public class ESClient { 35 | private static final Logger log = LoggerFactory.getLogger(ESClient.class); 36 | 37 | private JestClient jestClient; 38 | 39 | public JestClient getClient() { 40 | return jestClient; 41 | } 42 | 43 | public void createClient(String endpoint, 44 | String user, 45 | String passwd, 46 | boolean multiThread, 47 | int readTimeout, 48 | boolean compression, 49 | boolean discovery) { 50 | 51 | JestClientFactory factory = new JestClientFactory(); 52 | Builder httpClientConfig = new Builder(endpoint) 53 | .setPreemptiveAuth(new HttpHost(endpoint)) 54 | .multiThreaded(multiThread) 55 | .connTimeout(30000) 56 | .readTimeout(readTimeout) 57 | .maxTotalConnection(200) 58 | .requestCompressionEnabled(compression) 59 | .discoveryEnabled(discovery) 60 | .discoveryFrequency(5L, TimeUnit.MINUTES); 61 | 62 | if (!("".equals(user) || "".equals(passwd))) { 63 | httpClientConfig.defaultCredentials(user, passwd); 64 | } 65 | 66 | factory.setHttpClientConfig(httpClientConfig.build()); 67 | 68 | jestClient = factory.getObject(); 69 | } 70 | 71 | public boolean indicesExists(String indexName) throws Exception { 72 | boolean isIndicesExists = false; 73 | JestResult rst = jestClient.execute(new IndicesExists.Builder(indexName).build()); 74 | if (rst.isSucceeded()) { 75 | isIndicesExists = true; 76 | } else { 77 | switch (rst.getResponseCode()) { 78 | case 404: 79 | isIndicesExists = false; 80 | break; 81 | case 401: 82 | // 无权访问 83 | default: 84 | log.warn(rst.getErrorMessage()); 85 | break; 86 | } 87 | } 88 | return isIndicesExists; 89 | } 90 | 91 | public SearchResult search(String query, 92 | SearchType searchType, 93 | String index, 94 | String type, 95 | String scroll, 96 | Map headers) throws IOException { 97 | Search.Builder searchBuilder = new Search.Builder(query) 98 | .setSearchType(searchType) 99 | .addIndex(index).addType(type).setHeader(headers); 100 | if (StringUtils.isNotBlank(scroll)) { 101 | searchBuilder.setParameter("scroll", scroll); 102 | } 103 | return jestClient.execute(searchBuilder.build()); 104 | } 105 | 106 | public JestResult scroll(String scrollId, String scroll) throws Exception { 107 | SearchScroll.Builder builder = new SearchScroll.Builder(scrollId, scroll); 108 | return execute(builder.build()); 109 | } 110 | 111 | public void clearScroll(String scrollId) { 112 | ClearScroll.Builder builder = new ClearScroll.Builder().addScrollId(scrollId); 113 | try { 114 | execute(builder.build()); 115 | } catch (Exception e) { 116 | log.error(e.getMessage(), e); 117 | } 118 | } 119 | 120 | public JestResult execute(Action clientRequest) throws Exception { 121 | JestResult rst = null; 122 | rst = jestClient.execute(clientRequest); 123 | if (!rst.isSucceeded()) { 124 | //log.warn(rst.getErrorMessage()); 125 | } 126 | return rst; 127 | } 128 | 129 | public Integer getStatus(JestResult rst) { 130 | JsonObject jsonObject = rst.getJsonObject(); 131 | if (jsonObject.has("status")) { 132 | return jsonObject.get("status").getAsInt(); 133 | } 134 | return 600; 135 | } 136 | 137 | public boolean isBulkResult(JestResult rst) { 138 | JsonObject jsonObject = rst.getJsonObject(); 139 | return jsonObject.has("items"); 140 | } 141 | 142 | 143 | public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException { 144 | GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build(); 145 | AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build(); 146 | JestResult rst = jestClient.execute(getAliases); 147 | log.info(rst.getJsonString()); 148 | List list = new ArrayList(); 149 | if (rst.isSucceeded()) { 150 | JsonParser jp = new JsonParser(); 151 | JsonObject jo = (JsonObject) jp.parse(rst.getJsonString()); 152 | for (Map.Entry entry : jo.entrySet()) { 153 | String tindex = entry.getKey(); 154 | if (indexname.equals(tindex)) { 155 | continue; 156 | } 157 | AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build(); 158 | String s = new Gson().toJson(m.getData()); 159 | log.info(s); 160 | if (needClean) { 161 | list.add(m); 162 | } 163 | } 164 | } 165 | 166 | ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", "5m").build(); 167 | rst = jestClient.execute(modifyAliases); 168 | if (!rst.isSucceeded()) { 169 | log.error(rst.getErrorMessage()); 170 | return false; 171 | } 172 | return true; 173 | } 174 | 175 | /** 176 | * 关闭JestClient客户端 177 | */ 178 | public void closeJestClient() { 179 | if (jestClient != null) { 180 | jestClient.shutdownClient(); 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESClient.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.writer.elasticsearchwriter; 2 | 3 | import com.google.gson.Gson; 4 | import com.google.gson.JsonElement; 5 | import com.google.gson.JsonObject; 6 | import com.google.gson.JsonParser; 7 | import io.searchbox.action.Action; 8 | import io.searchbox.client.JestClient; 9 | import io.searchbox.client.JestClientFactory; 10 | import io.searchbox.client.JestResult; 11 | import io.searchbox.client.config.HttpClientConfig; 12 | import io.searchbox.client.config.HttpClientConfig.Builder; 13 | import io.searchbox.core.Bulk; 14 | import io.searchbox.indices.CreateIndex; 15 | import io.searchbox.indices.DeleteIndex; 16 | import io.searchbox.indices.IndicesExists; 17 | import io.searchbox.indices.aliases.*; 18 | import io.searchbox.indices.mapping.PutMapping; 19 | import org.apache.http.HttpHost; 20 | import org.slf4j.Logger; 21 | import org.slf4j.LoggerFactory; 22 | 23 | import java.io.IOException; 24 | import java.util.ArrayList; 25 | import java.util.List; 26 | import java.util.Map; 27 | import java.util.concurrent.TimeUnit; 28 | 29 | /** 30 | * Created by xiongfeng.bxf on 17/2/8. 31 | */ 32 | public class ESClient { 33 | private static final Logger log = LoggerFactory.getLogger(ESClient.class); 34 | 35 | private JestClient jestClient; 36 | 37 | public JestClient getClient() { 38 | return jestClient; 39 | } 40 | 41 | public void createClient(String endpoint, 42 | String user, 43 | String passwd, 44 | boolean multiThread, 45 | int readTimeout, 46 | boolean compression, 47 | boolean discovery) { 48 | 49 | JestClientFactory factory = new JestClientFactory(); 50 | Builder httpClientConfig = new HttpClientConfig 51 | .Builder(endpoint) 52 | .setPreemptiveAuth(new HttpHost(endpoint)) 53 | .multiThreaded(multiThread) 54 | .connTimeout(30000) 55 | .readTimeout(readTimeout) 56 | .maxTotalConnection(200) 57 | .requestCompressionEnabled(compression) 58 | .discoveryEnabled(discovery) 59 | .discoveryFrequency(5l, TimeUnit.MINUTES); 60 | 61 | if (!("".equals(user) || "".equals(passwd))) { 62 | httpClientConfig.defaultCredentials(user, passwd); 63 | } 64 | 65 | factory.setHttpClientConfig(httpClientConfig.build()); 66 | 67 | jestClient = factory.getObject(); 68 | } 69 | 70 | public boolean indicesExists(String indexName) throws Exception { 71 | boolean isIndicesExists = false; 72 | JestResult rst = jestClient.execute(new IndicesExists.Builder(indexName).build()); 73 | if (rst.isSucceeded()) { 74 | isIndicesExists = true; 75 | } else { 76 | switch (rst.getResponseCode()) { 77 | case 404: 78 | isIndicesExists = false; 79 | break; 80 | case 401: 81 | // 无权访问 82 | default: 83 | log.warn(rst.getErrorMessage()); 84 | break; 85 | } 86 | } 87 | return isIndicesExists; 88 | } 89 | 90 | public boolean deleteIndex(String indexName) throws Exception { 91 | log.info("delete index " + indexName); 92 | if (indicesExists(indexName)) { 93 | JestResult rst = execute(new DeleteIndex.Builder(indexName).build()); 94 | if (!rst.isSucceeded()) { 95 | return false; 96 | } 97 | } else { 98 | log.info("index cannot found, skip delete " + indexName); 99 | } 100 | return true; 101 | } 102 | 103 | public boolean createIndex(String indexName, String typeName, 104 | Object mappings, String settings, boolean dynamic) throws Exception { 105 | JestResult rst = null; 106 | if (!indicesExists(indexName)) { 107 | log.info("create index " + indexName); 108 | rst = jestClient.execute( 109 | new CreateIndex.Builder(indexName) 110 | .settings(settings) 111 | .setParameter("master_timeout", "5m") 112 | .build() 113 | ); 114 | //index_already_exists_exception 115 | if (!rst.isSucceeded()) { 116 | if (getStatus(rst) == 400) { 117 | log.info(String.format("index [%s] already exists", indexName)); 118 | return true; 119 | } else { 120 | log.error(rst.getErrorMessage()); 121 | return false; 122 | } 123 | } else { 124 | log.info(String.format("create [%s] index success", indexName)); 125 | } 126 | } 127 | 128 | int idx = 0; 129 | while (idx < 5) { 130 | if (indicesExists(indexName)) { 131 | break; 132 | } 133 | Thread.sleep(2000); 134 | idx ++; 135 | } 136 | if (idx >= 5) { 137 | return false; 138 | } 139 | 140 | if (dynamic) { 141 | log.info("ignore mappings"); 142 | return true; 143 | } 144 | log.info("create mappings for " + indexName + " " + mappings); 145 | rst = jestClient.execute(new PutMapping.Builder(indexName, typeName, mappings) 146 | .setParameter("master_timeout", "5m").build()); 147 | if (!rst.isSucceeded()) { 148 | if (getStatus(rst) == 400) { 149 | log.info(String.format("index [%s] mappings already exists", indexName)); 150 | } else { 151 | log.error(rst.getErrorMessage()); 152 | return false; 153 | } 154 | } else { 155 | log.info(String.format("index [%s] put mappings success", indexName)); 156 | } 157 | return true; 158 | } 159 | 160 | public JestResult execute(Action clientRequest) throws Exception { 161 | JestResult rst = null; 162 | rst = jestClient.execute(clientRequest); 163 | if (!rst.isSucceeded()) { 164 | //log.warn(rst.getErrorMessage()); 165 | } 166 | return rst; 167 | } 168 | 169 | public Integer getStatus(JestResult rst) { 170 | JsonObject jsonObject = rst.getJsonObject(); 171 | if (jsonObject.has("status")) { 172 | return jsonObject.get("status").getAsInt(); 173 | } 174 | return 600; 175 | } 176 | 177 | public boolean isBulkResult(JestResult rst) { 178 | JsonObject jsonObject = rst.getJsonObject(); 179 | return jsonObject.has("items"); 180 | } 181 | 182 | 183 | public boolean alias(String indexname, String aliasname, boolean needClean) throws IOException { 184 | GetAliases getAliases = new GetAliases.Builder().addIndex(aliasname).build(); 185 | AliasMapping addAliasMapping = new AddAliasMapping.Builder(indexname, aliasname).build(); 186 | JestResult rst = jestClient.execute(getAliases); 187 | log.info(rst.getJsonString()); 188 | List list = new ArrayList(); 189 | if (rst.isSucceeded()) { 190 | JsonParser jp = new JsonParser(); 191 | JsonObject jo = (JsonObject)jp.parse(rst.getJsonString()); 192 | for(Map.Entry entry : jo.entrySet()){ 193 | String tindex = entry.getKey(); 194 | if (indexname.equals(tindex)) { 195 | continue; 196 | } 197 | AliasMapping m = new RemoveAliasMapping.Builder(tindex, aliasname).build(); 198 | String s = new Gson().toJson(m.getData()); 199 | log.info(s); 200 | if (needClean) { 201 | list.add(m); 202 | } 203 | } 204 | } 205 | 206 | ModifyAliases modifyAliases = new ModifyAliases.Builder(addAliasMapping).addAlias(list).setParameter("master_timeout", "5m").build(); 207 | rst = jestClient.execute(modifyAliases); 208 | if (!rst.isSucceeded()) { 209 | log.error(rst.getErrorMessage()); 210 | return false; 211 | } 212 | return true; 213 | } 214 | 215 | public JestResult bulkInsert(Bulk.Builder bulk, int trySize) throws Exception { 216 | // es_rejected_execution_exception 217 | // illegal_argument_exception 218 | // cluster_block_exception 219 | JestResult rst = null; 220 | rst = jestClient.execute(bulk.build()); 221 | if (!rst.isSucceeded()) { 222 | log.warn(rst.getErrorMessage()); 223 | } 224 | return rst; 225 | } 226 | 227 | /** 228 | * 关闭JestClient客户端 229 | * 230 | */ 231 | public void closeJestClient() { 232 | if (jestClient != null) { 233 | jestClient.shutdownClient(); 234 | } 235 | } 236 | } 237 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /elasticsearchreader/src/main/java/com/alibaba/datax/plugin/reader/elasticsearchreader/EsReader.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.reader.elasticsearchreader; 2 | 3 | import com.alibaba.datax.common.element.*; 4 | import com.alibaba.datax.common.exception.DataXException; 5 | import com.alibaba.datax.common.exception.ExceptionTracker; 6 | import com.alibaba.datax.common.plugin.RecordSender; 7 | import com.alibaba.datax.common.spi.Reader; 8 | import com.alibaba.datax.common.statistics.PerfRecord; 9 | import com.alibaba.datax.common.statistics.PerfTrace; 10 | import com.alibaba.datax.common.util.Configuration; 11 | import com.alibaba.datax.plugin.reader.elasticsearchreader.gson.MapTypeAdapter; 12 | import com.alibaba.fastjson.JSON; 13 | import com.google.gson.Gson; 14 | import com.google.gson.GsonBuilder; 15 | import com.google.gson.JsonElement; 16 | import io.searchbox.client.JestResult; 17 | import io.searchbox.core.SearchResult; 18 | import io.searchbox.params.SearchType; 19 | import ognl.Ognl; 20 | import ognl.OgnlException; 21 | import org.apache.commons.lang3.StringUtils; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import java.io.IOException; 26 | import java.lang.reflect.Array; 27 | import java.math.BigDecimal; 28 | import java.util.*; 29 | 30 | /** 31 | * @author kesc mail:492167585@qq.com 32 | * @date 2020-04-14 10:32 33 | */ 34 | @SuppressWarnings(value = {"unchecked"}) 35 | public class EsReader extends Reader { 36 | 37 | public static class Job extends Reader.Job { 38 | private static final Logger log = LoggerFactory.getLogger(Job.class); 39 | private Configuration conf = null; 40 | 41 | @Override 42 | public void prepare() { 43 | /* 44 | * 注意:此方法仅执行一次。 45 | * 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。 46 | */ 47 | ESClient esClient = new ESClient(); 48 | esClient.createClient(Key.getEndpoint(conf), 49 | Key.getAccessID(conf), 50 | Key.getAccessKey(conf), 51 | false, 52 | 300000, 53 | false, 54 | false); 55 | 56 | String indexName = Key.getIndexName(conf); 57 | String typeName = Key.getTypeName(conf); 58 | log.info("index:[{}], type:[{}]", indexName, typeName); 59 | try { 60 | boolean isIndicesExists = esClient.indicesExists(indexName); 61 | if (!isIndicesExists) { 62 | throw new IOException(String.format("index[%s] not exist", indexName)); 63 | } 64 | } catch (Exception ex) { 65 | throw DataXException.asDataXException(ESReaderErrorCode.ES_INDEX_NOT_EXISTS, ex.toString()); 66 | } 67 | esClient.closeJestClient(); 68 | } 69 | 70 | @Override 71 | public void init() { 72 | this.conf = super.getPluginJobConf(); 73 | } 74 | 75 | @Override 76 | public List split(int adviceNumber) { 77 | List configurations = new ArrayList<>(); 78 | List search = conf.getList(Key.SEARCH_KEY, Object.class); 79 | for (Object query : search) { 80 | Configuration clone = conf.clone(); 81 | clone.set(Key.SEARCH_KEY, query); 82 | configurations.add(clone); 83 | } 84 | return configurations; 85 | } 86 | 87 | @Override 88 | public void post() { 89 | super.post(); 90 | } 91 | 92 | @Override 93 | public void destroy() { 94 | log.info("============elasticsearch reader job destroy================="); 95 | } 96 | } 97 | 98 | public static class Task extends Reader.Task { 99 | private static final Logger log = LoggerFactory.getLogger(Job.class); 100 | 101 | private Configuration conf; 102 | ESClient esClient = null; 103 | Gson gson = null; 104 | private String index; 105 | private String type; 106 | private SearchType searchType; 107 | private Map headers; 108 | private String query; 109 | private String scroll; 110 | private EsTable table; 111 | 112 | @Override 113 | public void prepare() { 114 | esClient.createClient(Key.getEndpoint(conf), 115 | Key.getAccessID(conf), 116 | Key.getAccessKey(conf), 117 | Key.isMultiThread(conf), 118 | Key.getTimeout(conf), 119 | Key.isCompression(conf), 120 | Key.isDiscovery(conf)); 121 | } 122 | 123 | @Override 124 | public void init() { 125 | this.conf = super.getPluginJobConf(); 126 | this.esClient = new ESClient(); 127 | this.gson = new GsonBuilder().registerTypeAdapterFactory(MapTypeAdapter.FACTORY).create(); 128 | this.index = Key.getIndexName(conf); 129 | this.type = Key.getTypeName(conf); 130 | this.searchType = Key.getSearchType(conf); 131 | this.headers = Key.getHeaders(conf); 132 | this.query = Key.getQuery(conf); 133 | this.scroll = Key.getScroll(conf); 134 | this.table = Key.getTable(conf); 135 | if (table == null || table.getColumn() == null || table.getColumn().isEmpty()) { 136 | throw DataXException.asDataXException(ESReaderErrorCode.COLUMN_CANT_BE_EMPTY, "请检查job的elasticsearchreader插件下parameter是否配置了table参数"); 137 | } 138 | } 139 | 140 | @Override 141 | public void startRead(RecordSender recordSender) { 142 | PerfTrace.getInstance().addTaskDetails(super.getTaskId(), index); 143 | //search 144 | PerfRecord queryPerfRecord = new PerfRecord(super.getTaskGroupId(), super.getTaskId(), PerfRecord.PHASE.SQL_QUERY); 145 | queryPerfRecord.start(); 146 | SearchResult searchResult; 147 | try { 148 | searchResult = esClient.search(query, searchType, index, type, scroll, headers); 149 | } catch (Exception e) { 150 | throw DataXException.asDataXException(ESReaderErrorCode.ES_SEARCH_ERROR, e); 151 | } 152 | if (!searchResult.isSucceeded()) { 153 | throw DataXException.asDataXException(ESReaderErrorCode.ES_SEARCH_ERROR, searchResult.getResponseCode() + ":" + searchResult.getErrorMessage()); 154 | } 155 | queryPerfRecord.end(); 156 | //transport records 157 | PerfRecord allResultPerfRecord = new PerfRecord(super.getTaskGroupId(), super.getTaskId(), PerfRecord.PHASE.RESULT_NEXT_ALL); 158 | allResultPerfRecord.start(); 159 | this.transportRecords(recordSender, searchResult); 160 | allResultPerfRecord.end(); 161 | //do scroll 162 | JsonElement scrollIdElement = searchResult.getJsonObject().get("_scroll_id"); 163 | if (scrollIdElement == null) { 164 | return; 165 | } 166 | String scrollId = scrollIdElement.getAsString(); 167 | log.info("scroll id:{}", scrollId); 168 | try { 169 | boolean hasElement = true; 170 | while (hasElement) { 171 | queryPerfRecord.start(); 172 | JestResult scroll = esClient.scroll(scrollId, this.scroll); 173 | queryPerfRecord.end(); 174 | if (!scroll.isSucceeded()) { 175 | throw DataXException.asDataXException(ESReaderErrorCode.ES_SEARCH_ERROR, String.format("scroll[id=%s] search error,code:%s,msg:%s", scrollId, scroll.getResponseCode(), scroll.getErrorMessage())); 176 | } 177 | allResultPerfRecord.start(); 178 | hasElement = this.transportRecords(recordSender, parseSearchResult(scroll)); 179 | allResultPerfRecord.end(); 180 | } 181 | } catch (DataXException dxe) { 182 | throw dxe; 183 | } catch (Exception e) { 184 | throw DataXException.asDataXException(ESReaderErrorCode.ES_SEARCH_ERROR, e); 185 | } finally { 186 | esClient.clearScroll(scrollId); 187 | } 188 | } 189 | 190 | private SearchResult parseSearchResult(JestResult jestResult) { 191 | if (jestResult == null) { 192 | return null; 193 | } 194 | SearchResult searchResult = new SearchResult(gson); 195 | searchResult.setSucceeded(jestResult.isSucceeded()); 196 | searchResult.setResponseCode(jestResult.getResponseCode()); 197 | searchResult.setPathToResult(jestResult.getPathToResult()); 198 | searchResult.setJsonString(jestResult.getJsonString()); 199 | searchResult.setJsonObject(jestResult.getJsonObject()); 200 | searchResult.setErrorMessage(jestResult.getErrorMessage()); 201 | return searchResult; 202 | } 203 | 204 | private void setDefaultValue(List column, Map data) { 205 | for (EsField field : column) { 206 | if (field.hasChild()) { 207 | setDefaultValue(field.getChild(), data); 208 | } else { 209 | data.putIfAbsent(field.getFinalName(table.getNameCase()), null); 210 | } 211 | } 212 | } 213 | 214 | private void getPathSource(List> result, Map source, List column, Map parent) { 215 | if (source.isEmpty()) { 216 | return; 217 | } 218 | for (EsField esField : column) { 219 | if (!esField.hasChild()) { 220 | parent.put(esField.getFinalName(table.getNameCase()), source.getOrDefault(esField.getName(), esField.getValue())); 221 | } 222 | } 223 | for (EsField esField : column) { 224 | if (!esField.hasChild()) { 225 | continue; 226 | } 227 | Object value = source.get(esField.getName()); 228 | if (value instanceof Map) { 229 | getPathSource(result, (Map) value, esField.getChild(), parent); 230 | } else if (value instanceof List) { 231 | List> valueList = (List>) value; 232 | if (valueList.isEmpty()) { 233 | continue; 234 | } 235 | List> joinResults = new ArrayList<>(); 236 | ArrayList> copyResult = new ArrayList<>(result); 237 | result.clear(); 238 | for (Map joinParent : copyResult) { 239 | for (Map item : valueList) { 240 | HashMap childData = new LinkedHashMap<>(joinParent); 241 | joinResults.add(childData); 242 | getPathSource(joinResults, item, esField.getChild(), childData); 243 | result.addAll(joinResults); 244 | joinResults.clear(); 245 | } 246 | } 247 | copyResult.clear(); 248 | } 249 | } 250 | } 251 | 252 | private Object getOgnlValue(Object expression, Map root, Object defaultValue) { 253 | try { 254 | if (!(expression instanceof String)) { 255 | return defaultValue; 256 | } 257 | Object value = Ognl.getValue(expression.toString(), root); 258 | if (value == null) { 259 | return defaultValue; 260 | } 261 | return value; 262 | } catch (OgnlException e) { 263 | return defaultValue; 264 | } 265 | } 266 | 267 | private boolean filter(String filter, String deleteFilterKey, Map record) { 268 | if (StringUtils.isNotBlank(deleteFilterKey)) { 269 | record.remove(deleteFilterKey); 270 | } 271 | if (StringUtils.isBlank(filter)) { 272 | return true; 273 | } 274 | return (Boolean) getOgnlValue(filter, record, Boolean.TRUE); 275 | } 276 | 277 | private boolean transportRecords(RecordSender recordSender, SearchResult result) { 278 | if (result == null) { 279 | return false; 280 | } 281 | List sources = result.getSourceAsStringList(); 282 | if (sources == null) { 283 | sources = Collections.emptyList(); 284 | } 285 | // log.info("search result: total={},maxScore={},hits={}", result.getTotal(), result.getMaxScore(), sources.size()); 286 | List> recordMaps = new ArrayList<>(); 287 | for (String source : sources) { 288 | List column = table.getColumn(); 289 | if (column == null || column.isEmpty()) { 290 | continue; 291 | } 292 | Map parent = new LinkedHashMap<>((int) (column.size() * 1.5)); 293 | setDefaultValue(table.getColumn(), parent); 294 | recordMaps.add(parent); 295 | getPathSource(recordMaps, gson.fromJson(source, Map.class), column, parent); 296 | this.transportOneRecord(table, recordSender, recordMaps); 297 | recordMaps.clear(); 298 | } 299 | return sources.size() > 0; 300 | } 301 | 302 | private void transportOneRecord(EsTable table, RecordSender recordSender, List> recordMaps) { 303 | for (Map o : recordMaps) { 304 | boolean allow = filter(table.getFilter(), table.getDeleteFilterKey(), o); 305 | if (allow && o.entrySet().stream().anyMatch(x -> x.getValue() != null)) { 306 | Record record = buildRecord(recordSender, o); 307 | recordSender.sendToWriter(record); 308 | } 309 | } 310 | } 311 | 312 | private Record buildRecord(RecordSender recordSender, Map source) { 313 | Record record = recordSender.createRecord(); 314 | boolean hasDirty = false; 315 | StringBuilder sb = new StringBuilder(); 316 | for (Map.Entry entry : source.entrySet()) { 317 | try { 318 | Object o = source.get(entry.getKey()); 319 | record.addColumn(getColumn(o)); 320 | } catch (Exception e) { 321 | hasDirty = true; 322 | sb.append(ExceptionTracker.trace(e)); 323 | } 324 | } 325 | if (hasDirty) { 326 | getTaskPluginCollector().collectDirtyRecord(record, sb.toString()); 327 | } 328 | return record; 329 | } 330 | 331 | private Column getColumn(Object value) { 332 | Column col; 333 | if (value == null) { 334 | col = new StringColumn(); 335 | } else if (value instanceof String) { 336 | col = new StringColumn((String) value); 337 | } else if (value instanceof Integer) { 338 | col = new LongColumn(((Integer) value).longValue()); 339 | } else if (value instanceof Long) { 340 | col = new LongColumn((Long) value); 341 | } else if (value instanceof Byte) { 342 | col = new LongColumn(((Byte) value).longValue()); 343 | } else if (value instanceof Short) { 344 | col = new LongColumn(((Short) value).longValue()); 345 | } else if (value instanceof Double) { 346 | col = new DoubleColumn(BigDecimal.valueOf((Double) value)); 347 | } else if (value instanceof Float) { 348 | col = new DoubleColumn(BigDecimal.valueOf(((Float) value).doubleValue())); 349 | } else if (value instanceof Date) { 350 | col = new DateColumn((Date) value); 351 | } else if (value instanceof Boolean) { 352 | col = new BoolColumn((Boolean) value); 353 | } else if (value instanceof byte[]) { 354 | col = new BytesColumn((byte[]) value); 355 | } else if (value instanceof List) { 356 | col = new StringColumn(JSON.toJSONString(value)); 357 | } else if (value instanceof Map) { 358 | col = new StringColumn(JSON.toJSONString(value)); 359 | } else if (value instanceof Array) { 360 | col = new StringColumn(JSON.toJSONString(value)); 361 | } else { 362 | throw DataXException.asDataXException(ESReaderErrorCode.UNKNOWN_DATA_TYPE, "type:" + value.getClass().getName()); 363 | } 364 | return col; 365 | } 366 | 367 | @Override 368 | public void post() { 369 | super.post(); 370 | } 371 | 372 | @Override 373 | public void destroy() { 374 | log.info("============elasticsearch reader taskGroup[{}] taskId[{}] destroy=================", super.getTaskGroupId(), super.getTaskId()); 375 | esClient.closeJestClient(); 376 | } 377 | } 378 | } 379 | -------------------------------------------------------------------------------- /elasticsearchwriter/src/main/java/com/alibaba/datax/plugin/writer/elasticsearchwriter/ESWriter.java: -------------------------------------------------------------------------------- 1 | package com.alibaba.datax.plugin.writer.elasticsearchwriter; 2 | 3 | import com.alibaba.datax.common.element.Column; 4 | import com.alibaba.datax.common.element.Record; 5 | import com.alibaba.datax.common.exception.DataXException; 6 | import com.alibaba.datax.common.plugin.RecordReceiver; 7 | import com.alibaba.datax.common.spi.Writer; 8 | import com.alibaba.datax.common.util.Configuration; 9 | import com.alibaba.datax.common.util.RetryUtil; 10 | import com.alibaba.fastjson.JSON; 11 | import com.alibaba.fastjson.JSONArray; 12 | import com.alibaba.fastjson.JSONObject; 13 | import com.alibaba.fastjson.TypeReference; 14 | import io.searchbox.client.JestResult; 15 | import io.searchbox.core.Bulk; 16 | import io.searchbox.core.BulkResult; 17 | import io.searchbox.core.Index; 18 | import org.joda.time.DateTime; 19 | import org.joda.time.DateTimeZone; 20 | import org.joda.time.format.DateTimeFormat; 21 | import org.joda.time.format.DateTimeFormatter; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import java.io.IOException; 26 | import java.util.*; 27 | import java.util.concurrent.Callable; 28 | 29 | public class ESWriter extends Writer { 30 | private final static String WRITE_COLUMNS = "write_columns"; 31 | 32 | public static class Job extends Writer.Job { 33 | private static final Logger log = LoggerFactory.getLogger(Job.class); 34 | 35 | private Configuration conf = null; 36 | private boolean flatToNested; 37 | 38 | @Override 39 | public void init() { 40 | this.conf = super.getPluginJobConf(); 41 | flatToNested = conf.getBool("flatToNested", false); 42 | } 43 | 44 | @Override 45 | public void prepare() { 46 | /** 47 | * 注意:此方法仅执行一次。 48 | * 最佳实践:如果 Job 中有需要进行数据同步之前的处理,可以在此处完成,如果没有必要则可以直接去掉。 49 | */ 50 | ESClient esClient = new ESClient(); 51 | esClient.createClient(Key.getEndpoint(conf), 52 | Key.getAccessID(conf), 53 | Key.getAccessKey(conf), 54 | false, 55 | 300000, 56 | false, 57 | false); 58 | 59 | String indexName = Key.getIndexName(conf); 60 | String typeName = Key.getTypeName(conf); 61 | boolean dynamic = Key.getDynamic(conf); 62 | String mappings = genMappings(typeName); 63 | String settings = JSONObject.toJSONString( 64 | Key.getSettings(conf) 65 | ); 66 | log.info(String.format("index:[%s], type:[%s], mappings:[%s]", indexName, typeName, mappings)); 67 | 68 | try { 69 | boolean isIndicesExists = esClient.indicesExists(indexName); 70 | if (Key.isCleanup(this.conf) && isIndicesExists) { 71 | esClient.deleteIndex(indexName); 72 | } 73 | // 强制创建,内部自动忽略已存在的情况 74 | if (!esClient.createIndex(indexName, typeName, mappings, settings, dynamic)) { 75 | throw new IOException("create index or mapping failed"); 76 | } 77 | } catch (Exception ex) { 78 | throw DataXException.asDataXException(ESWriterErrorCode.ES_MAPPINGS, ex.toString()); 79 | } 80 | esClient.closeJestClient(); 81 | } 82 | 83 | private void genRecMapping(List columnList, List column, Map propMap) { 84 | if (column != null) { 85 | for (Object col : column) { 86 | JSONObject jo = JSONObject.parseObject(col.toString()); 87 | String colName = jo.getString("name"); 88 | String colTypeStr = jo.getString("type"); 89 | Integer colNo = jo.getInteger("colNo"); 90 | Boolean ignore = jo.getBoolean("ignore"); 91 | if (colTypeStr == null) { 92 | throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " column must have type"); 93 | } 94 | ESFieldType colType = ESFieldType.getESFieldType(colTypeStr); 95 | if (colType == null) { 96 | throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, col.toString() + " unsupported type"); 97 | } 98 | 99 | ESColumn columnItem = new ESColumn(); 100 | columnItem.setColNo(colNo); 101 | columnItem.setIgnore(ignore == null ? Boolean.FALSE : ignore); 102 | if (colName.equals(Key.PRIMARY_KEY_COLUMN_NAME)) { 103 | // 兼容已有版本 104 | colType = ESFieldType.ID; 105 | colTypeStr = "id"; 106 | } 107 | 108 | columnItem.setName(colName); 109 | columnItem.setType(colTypeStr); 110 | 111 | Boolean array = jo.getBoolean("array"); 112 | if (array != null) { 113 | columnItem.setArray(array); 114 | } 115 | 116 | if (colType == ESFieldType.ID) { 117 | columnList.add(columnItem); 118 | // 如果是id,则properties为空 119 | continue; 120 | } 121 | 122 | Map field = new HashMap(); 123 | field.put("type", colTypeStr); 124 | //https://www.elastic.co/guide/en/elasticsearch/reference/5.2/breaking_50_mapping_changes.html#_literal_index_literal_property 125 | // https://www.elastic.co/guide/en/elasticsearch/guide/2.x/_deep_dive_on_doc_values.html#_disabling_doc_values 126 | field.put("doc_values", jo.getBoolean("doc_values")); 127 | field.put("ignore_above", jo.getInteger("ignore_above")); 128 | field.put("index", jo.getBoolean("index")); 129 | 130 | switch (colType) { 131 | case STRING: 132 | // 兼容string类型,ES5之前版本 133 | break; 134 | case KEYWORD: 135 | // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-search-speed.html#_warm_up_global_ordinals 136 | field.put("eager_global_ordinals", jo.getBoolean("eager_global_ordinals")); 137 | break; 138 | case TEXT: 139 | field.put("analyzer", jo.getString("analyzer")); 140 | // 优化disk使用,也同步会提高index性能 141 | // https://www.elastic.co/guide/en/elasticsearch/reference/current/tune-for-disk-usage.html 142 | field.put("norms", jo.getBoolean("norms")); 143 | field.put("index_options", jo.getBoolean("index_options")); 144 | break; 145 | case DATE: 146 | columnItem.setTimeZone(jo.getString("timezone")); 147 | columnItem.setFormat(jo.getString("format")); 148 | // 后面时间会处理为带时区的标准时间,所以不需要给ES指定格式 149 | /* 150 | if (jo.getString("format") != null) { 151 | field.put("format", jo.getString("format")); 152 | } else { 153 | //field.put("format", "strict_date_optional_time||epoch_millis||yyyy-MM-dd HH:mm:ss||yyyy-MM-dd"); 154 | } 155 | */ 156 | break; 157 | case GEO_SHAPE: 158 | field.put("tree", jo.getString("tree")); 159 | field.put("precision", jo.getString("precision")); 160 | break; 161 | default: 162 | break; 163 | } 164 | columnList.add(columnItem); 165 | JSONArray child = jo.getJSONArray("child"); 166 | if (flatToNested && child != null && child.size() > 0) { 167 | List childColumns = new ArrayList<>(); 168 | columnItem.setChild(childColumns); 169 | Map o = (Map) propMap.computeIfAbsent(colName, k -> new HashMap<>()); 170 | o.put("type", colTypeStr); 171 | Map innerField = (Map) o.computeIfAbsent("properties", k -> new HashMap()); 172 | genRecMapping(childColumns, child, innerField); 173 | } else { 174 | propMap.put(colName, field); 175 | } 176 | } 177 | } 178 | } 179 | 180 | private String genMappings(String typeName) { 181 | String mappings = null; 182 | Map propMap = new HashMap(); 183 | List columnList = new ArrayList(); 184 | 185 | List column = conf.getList("column"); 186 | genRecMapping(columnList, column, propMap); 187 | 188 | String columnJson = JSON.toJSONString(columnList); 189 | conf.set(WRITE_COLUMNS, columnJson); 190 | 191 | log.info(columnJson); 192 | 193 | Map rootMappings = new HashMap(); 194 | Map typeMappings = new HashMap(); 195 | typeMappings.put("properties", propMap); 196 | rootMappings.put(typeName, typeMappings); 197 | 198 | mappings = JSON.toJSONString(rootMappings); 199 | 200 | if (mappings == null || "".equals(mappings)) { 201 | throw DataXException.asDataXException(ESWriterErrorCode.BAD_CONFIG_VALUE, "must have mappings"); 202 | } 203 | 204 | return mappings; 205 | } 206 | 207 | @Override 208 | public List split(int mandatoryNumber) { 209 | List configurations = new ArrayList(mandatoryNumber); 210 | for (int i = 0; i < mandatoryNumber; i++) { 211 | configurations.add(conf); 212 | } 213 | return configurations; 214 | } 215 | 216 | @Override 217 | public void post() { 218 | ESClient esClient = new ESClient(); 219 | esClient.createClient(Key.getEndpoint(conf), 220 | Key.getAccessID(conf), 221 | Key.getAccessKey(conf), 222 | false, 223 | 300000, 224 | false, 225 | false); 226 | String alias = Key.getAlias(conf); 227 | if (!"".equals(alias)) { 228 | log.info(String.format("alias [%s] to [%s]", alias, Key.getIndexName(conf))); 229 | try { 230 | esClient.alias(Key.getIndexName(conf), alias, Key.isNeedCleanAlias(conf)); 231 | } catch (IOException e) { 232 | throw DataXException.asDataXException(ESWriterErrorCode.ES_ALIAS_MODIFY, e); 233 | } 234 | } 235 | } 236 | 237 | @Override 238 | public void destroy() { 239 | 240 | } 241 | } 242 | 243 | public static class Task extends Writer.Task { 244 | 245 | private static final Logger log = LoggerFactory.getLogger(Job.class); 246 | 247 | private Configuration conf; 248 | 249 | 250 | ESClient esClient = null; 251 | private List typeList; 252 | private List columnList; 253 | 254 | private int trySize; 255 | private int batchSize; 256 | private String index; 257 | private String type; 258 | private String splitter; 259 | private boolean flatToNested; 260 | 261 | @Override 262 | public void init() { 263 | this.conf = super.getPluginJobConf(); 264 | index = Key.getIndexName(conf); 265 | type = Key.getTypeName(conf); 266 | 267 | trySize = Key.getTrySize(conf); 268 | batchSize = Key.getBatchSize(conf); 269 | splitter = Key.getSplitter(conf); 270 | columnList = JSON.parseObject(this.conf.getString(WRITE_COLUMNS), new TypeReference>() { 271 | }); 272 | 273 | typeList = new ArrayList(); 274 | 275 | for (ESColumn col : columnList) { 276 | typeList.add(ESFieldType.getESFieldType(col.getType())); 277 | } 278 | this.flatToNested = conf.getBool("flatToNested", false); 279 | esClient = new ESClient(); 280 | } 281 | 282 | @Override 283 | public void prepare() { 284 | esClient.createClient(Key.getEndpoint(conf), 285 | Key.getAccessID(conf), 286 | Key.getAccessKey(conf), 287 | Key.isMultiThread(conf), 288 | Key.getTimeout(conf), 289 | Key.isCompression(conf), 290 | Key.isDiscovery(conf)); 291 | } 292 | 293 | private String getRecColumn(Map> records, List columns, Record record) { 294 | String id = null; 295 | for (ESColumn column : columns) { 296 | if (column.getType().equals(ESFieldType.ID.name().toLowerCase())) { 297 | id = record.getColumn(column.getColNo()).asString(); 298 | break; 299 | } 300 | } 301 | if (id == null) { 302 | id = "_" + UUID.randomUUID().toString(); 303 | } 304 | Map data = records.computeIfAbsent(id, k -> new HashMap<>()); 305 | for (ESColumn column : columns) { 306 | if (column.hasChild()) { 307 | Map> childData = (Map>) data.computeIfAbsent(column, k -> new HashMap<>()); 308 | getRecColumn(childData, column.getChild(), record); 309 | } else { 310 | data.put(column, record.getColumn(column.getColNo())); 311 | } 312 | } 313 | return id; 314 | } 315 | 316 | public void startWriteNested(RecordReceiver recordReceiver) { 317 | long total = 0; 318 | Record record = null; 319 | Map> records = new HashMap<>(this.batchSize, 1); 320 | String lastId = "***"; 321 | while ((record = recordReceiver.getFromReader()) != null) { 322 | String id = getRecColumn(records, columnList, record); 323 | if (!id.equals(lastId) && records.keySet().size() >= this.batchSize) { 324 | total += doBatchInsert(records); 325 | records.clear(); 326 | } 327 | lastId = id; 328 | } 329 | if (!records.isEmpty()) { 330 | total += doBatchInsert(records); 331 | records.clear(); 332 | } 333 | 334 | String msg = String.format("task end, write size :%d", total); 335 | getTaskPluginCollector().collectMessage("writesize", String.valueOf(total)); 336 | log.info(msg); 337 | esClient.closeJestClient(); 338 | } 339 | 340 | private String setColumnValue(ESFieldType columnType, ESColumn esColumn, Column column, Map data, String columnName) { 341 | String id = null; 342 | if (!columnType.equals(ESFieldType.ID) && esColumn.getIgnore()) { 343 | return id; 344 | } 345 | //如果是数组类型,那它传入的必是字符串类型 346 | if (esColumn.isArray() != null && esColumn.isArray()) { 347 | String[] dataList = Optional.ofNullable(column.asString()).orElse(splitter).split(splitter); 348 | if (!columnType.equals(ESFieldType.DATE)) { 349 | data.put(columnName, dataList); 350 | } else { 351 | for (int pos = 0; pos < dataList.length; pos++) { 352 | dataList[pos] = getDateStr(esColumn, column); 353 | } 354 | data.put(columnName, dataList); 355 | } 356 | } else { 357 | switch (columnType) { 358 | case ID: 359 | if (id != null) { 360 | id += column.asString(); 361 | } else { 362 | id = column.asString(); 363 | } 364 | if (!esColumn.getIgnore()) { 365 | data.put(columnName, id); 366 | } 367 | break; 368 | case DATE: 369 | try { 370 | String dateStr = getDateStr(esColumn, column); 371 | data.put(columnName, dateStr); 372 | } catch (Exception e) { 373 | log.error(String.format("时间类型解析失败 [%s:%s] exception: %s", columnName, column.toString(), e.toString())); 374 | } 375 | break; 376 | case KEYWORD: 377 | case STRING: 378 | case TEXT: 379 | case IP: 380 | case GEO_POINT: 381 | data.put(columnName, column.asString()); 382 | break; 383 | case BOOLEAN: 384 | data.put(columnName, column.asBoolean()); 385 | break; 386 | case BYTE: 387 | case BINARY: 388 | data.put(columnName, column.asBytes()); 389 | break; 390 | case LONG: 391 | data.put(columnName, column.asLong()); 392 | break; 393 | case INTEGER: 394 | data.put(columnName, column.asBigInteger()); 395 | break; 396 | case SHORT: 397 | data.put(columnName, column.asBigInteger()); 398 | break; 399 | case FLOAT: 400 | case DOUBLE: 401 | data.put(columnName, column.asDouble()); 402 | break; 403 | case NESTED: 404 | case OBJECT: 405 | case FLATTENED: 406 | case GEO_SHAPE: 407 | data.put(columnName, JSON.parse(column.asString())); 408 | break; 409 | default: 410 | log.error("类型错误:不支持的类型:" + columnType + " " + columnName); 411 | } 412 | } 413 | return id; 414 | } 415 | 416 | private void getRecData(Map records, Map data) { 417 | for (Map.Entry record : records.entrySet()) { 418 | Object value = record.getValue(); 419 | if (value == null || (value instanceof String && "".equals(value.toString()))) { 420 | continue; 421 | } 422 | if (value instanceof Column) { 423 | Column column = (Column) value; 424 | ESFieldType columnType = ESFieldType.getESFieldType(record.getKey().getType()); 425 | String columnName = record.getKey().getName(); 426 | setColumnValue(columnType, record.getKey(), column, data, columnName); 427 | } else if (value instanceof Map) { 428 | if (!record.getKey().isArray()) { 429 | Map o = (Map) data.computeIfAbsent(record.getKey().getName(), k -> new HashMap()); 430 | Map> childValue = (Map>) value; 431 | for (Map.Entry> childRecord : childValue.entrySet()) { 432 | getRecData(childRecord.getValue(), o); 433 | } 434 | } else { 435 | List> o = (List>) data.computeIfAbsent(record.getKey().getName(), k -> new ArrayList>()); 436 | Map> childValue = (Map>) value; 437 | for (Map.Entry> childRecord : childValue.entrySet()) { 438 | Map childData = new HashMap<>(); 439 | getRecData(childRecord.getValue(), childData); 440 | if (!childData.values().isEmpty() && childData.values().stream().anyMatch(Objects::nonNull)) { 441 | o.add(childData); 442 | } 443 | } 444 | if (o.isEmpty()) { 445 | data.remove(record.getKey().getName()); 446 | } 447 | } 448 | } 449 | } 450 | } 451 | 452 | private long doBatchInsert(final Map> writerBuffer) { 453 | final Bulk.Builder bulkaction = new Bulk.Builder().defaultIndex(this.index).defaultType(this.type); 454 | for (Map.Entry> entry : writerBuffer.entrySet()) { 455 | Map data = new HashMap<>(); 456 | getRecData(entry.getValue(), data); 457 | if (entry.getKey().startsWith("_")) { 458 | //id = UUID.randomUUID().toString(); 459 | bulkaction.addAction(new Index.Builder(data).build()); 460 | } else { 461 | bulkaction.addAction(new Index.Builder(data).id(entry.getKey()).build()); 462 | } 463 | } 464 | try { 465 | return RetryUtil.executeWithRetry(new Callable() { 466 | @Override 467 | public Integer call() throws Exception { 468 | JestResult jestResult = esClient.bulkInsert(bulkaction, 1); 469 | if (jestResult.isSucceeded()) { 470 | return writerBuffer.keySet().size(); 471 | } 472 | 473 | String msg = String.format("response code: [%d] error :[%s]", jestResult.getResponseCode(), jestResult.getErrorMessage()); 474 | log.warn(msg); 475 | if (esClient.isBulkResult(jestResult)) { 476 | BulkResult brst = (BulkResult) jestResult; 477 | List failedItems = brst.getFailedItems(); 478 | for (BulkResult.BulkResultItem item : failedItems) { 479 | if (item.status != 400) { 480 | // 400 BAD_REQUEST 如果非数据异常,请求异常,则不允许忽略 481 | throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s", item.status, item.error)); 482 | } else { 483 | // 如果用户选择不忽略解析错误,则抛异常,默认为忽略 484 | if (!Key.isIgnoreParseError(conf)) { 485 | throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s, config not ignoreParseError so throw this error", item.status, item.error)); 486 | } 487 | } 488 | } 489 | 490 | List items = brst.getItems(); 491 | for (BulkResult.BulkResultItem item : items) { 492 | if (item.error != null && !"".equals(item.error)) { 493 | log.error(String.format("id:[%s],index:%s,status:[%d], error: %s", item.id, item.index, item.status, item.error)); 494 | } 495 | } 496 | return writerBuffer.keySet().size() - brst.getFailedItems().size(); 497 | } else { 498 | Integer status = esClient.getStatus(jestResult); 499 | if (status == 429) { //TOO_MANY_REQUESTS 500 | log.warn("server response too many requests, so auto reduce speed"); 501 | } 502 | throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, jestResult.getErrorMessage()); 503 | } 504 | } 505 | }, trySize, 60000L, true); 506 | } catch (Exception e) { 507 | if (Key.isIgnoreWriteError(this.conf)) { 508 | log.warn(String.format("重试[%d]次写入失败,忽略该错误,继续写入!", trySize)); 509 | } else { 510 | throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, e); 511 | } 512 | } 513 | return 0; 514 | } 515 | 516 | @Override 517 | public void startWrite(RecordReceiver recordReceiver) { 518 | if (this.flatToNested) { 519 | this.startWriteNested(recordReceiver); 520 | } else { 521 | this.startWriteFlat(recordReceiver); 522 | } 523 | } 524 | 525 | public void startWriteFlat(RecordReceiver recordReceiver) { 526 | List writerBuffer = new ArrayList(this.batchSize); 527 | Record record = null; 528 | long total = 0; 529 | while ((record = recordReceiver.getFromReader()) != null) { 530 | writerBuffer.add(record); 531 | if (writerBuffer.size() >= this.batchSize) { 532 | total += doBatchInsert(writerBuffer); 533 | writerBuffer.clear(); 534 | } 535 | } 536 | 537 | if (!writerBuffer.isEmpty()) { 538 | total += doBatchInsert(writerBuffer); 539 | writerBuffer.clear(); 540 | } 541 | 542 | String msg = String.format("task end, write size :%d", total); 543 | getTaskPluginCollector().collectMessage("writesize", String.valueOf(total)); 544 | log.info(msg); 545 | esClient.closeJestClient(); 546 | } 547 | 548 | private String getDateStr(ESColumn esColumn, Column column) { 549 | if (column.getRawData() == null) { 550 | return null; 551 | } 552 | DateTime date = null; 553 | DateTimeZone dtz = DateTimeZone.getDefault(); 554 | if (esColumn.getTimezone() != null) { 555 | // 所有时区参考 http://www.joda.org/joda-time/timezones.html 556 | dtz = DateTimeZone.forID(esColumn.getTimezone()); 557 | } 558 | if (column.getType() != Column.Type.DATE && esColumn.getFormat() != null) { 559 | DateTimeFormatter formatter = DateTimeFormat.forPattern(esColumn.getFormat()); 560 | date = formatter.withZone(dtz).parseDateTime(column.asString()); 561 | return date.toString(); 562 | } else if (column.getType() == Column.Type.DATE) { 563 | date = new DateTime(column.asLong(), dtz); 564 | return date.toString(); 565 | } else { 566 | return column.asString(); 567 | } 568 | } 569 | 570 | private long doBatchInsert(final List writerBuffer) { 571 | Map data = null; 572 | final Bulk.Builder bulkaction = new Bulk.Builder().defaultIndex(this.index).defaultType(this.type); 573 | for (Record record : writerBuffer) { 574 | data = new HashMap<>(); 575 | String id = null; 576 | for (int i = 0; i < record.getColumnNumber(); i++) { 577 | Column column = record.getColumn(i); 578 | String columnName = columnList.get(i).getName(); 579 | ESFieldType columnType = typeList.get(i); 580 | String tempId = setColumnValue(columnType, columnList.get(i), column, data, columnName); 581 | if (tempId != null) { 582 | id = tempId; 583 | } 584 | } 585 | 586 | if (id == null) { 587 | //id = UUID.randomUUID().toString(); 588 | bulkaction.addAction(new Index.Builder(data).build()); 589 | } else { 590 | bulkaction.addAction(new Index.Builder(data).id(id).build()); 591 | } 592 | } 593 | 594 | try { 595 | return RetryUtil.executeWithRetry(new Callable() { 596 | @Override 597 | public Integer call() throws Exception { 598 | JestResult jestResult = esClient.bulkInsert(bulkaction, 1); 599 | if (jestResult.isSucceeded()) { 600 | return writerBuffer.size(); 601 | } 602 | 603 | String msg = String.format("response code: [%d] error :[%s]", jestResult.getResponseCode(), jestResult.getErrorMessage()); 604 | log.warn(msg); 605 | if (esClient.isBulkResult(jestResult)) { 606 | BulkResult brst = (BulkResult) jestResult; 607 | List failedItems = brst.getFailedItems(); 608 | for (BulkResult.BulkResultItem item : failedItems) { 609 | if (item.status != 400) { 610 | // 400 BAD_REQUEST 如果非数据异常,请求异常,则不允许忽略 611 | throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s", item.status, item.error)); 612 | } else { 613 | // 如果用户选择不忽略解析错误,则抛异常,默认为忽略 614 | if (!Key.isIgnoreParseError(conf)) { 615 | throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, String.format("status:[%d], error: %s, config not ignoreParseError so throw this error", item.status, item.error)); 616 | } 617 | } 618 | } 619 | 620 | List items = brst.getItems(); 621 | for (int idx = 0; idx < items.size(); ++idx) { 622 | BulkResult.BulkResultItem item = items.get(idx); 623 | if (item.error != null && !"".equals(item.error)) { 624 | getTaskPluginCollector().collectDirtyRecord(writerBuffer.get(idx), String.format("status:[%d], error: %s", item.status, item.error)); 625 | } 626 | } 627 | return writerBuffer.size() - brst.getFailedItems().size(); 628 | } else { 629 | Integer status = esClient.getStatus(jestResult); 630 | if (status == 429) { //TOO_MANY_REQUESTS 631 | log.warn("server response too many requests, so auto reduce speed"); 632 | } 633 | throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, jestResult.getErrorMessage()); 634 | } 635 | } 636 | }, trySize, 60000L, true); 637 | } catch (Exception e) { 638 | if (Key.isIgnoreWriteError(this.conf)) { 639 | log.warn(String.format("重试[%d]次写入失败,忽略该错误,继续写入!", trySize)); 640 | } else { 641 | throw DataXException.asDataXException(ESWriterErrorCode.ES_INDEX_INSERT, e); 642 | } 643 | } 644 | return 0; 645 | } 646 | 647 | @Override 648 | public void post() { 649 | } 650 | 651 | @Override 652 | public void destroy() { 653 | esClient.closeJestClient(); 654 | } 655 | } 656 | } 657 | --------------------------------------------------------------------------------