17 | * 您可以通过这里的实现,将不同的数据输出组件对接进来 18 | *
19 | * 还可以选择使用算法库,其中的算法数据组件是完善的,调用起来很方便 只需要通过 RW.getDT_UDF_Stream方法就可以获取到算法库的资源
20 | *
21 | *
24 | * You can connect different data output components through the implementation here 25 | *
26 | * You can also choose to use the algorithm library. The algorithm data components in it are complete and easy to call. You only need to use the RW.getDT_UDF_Stream method to get the resources of the algorithm library.
27 | *
28 | *
29 | * @param inPath 数据输入路径
30 | * @return 数据输入组件
31 | * @throws IOException 将数据输入组件对接到接口的时候,可能会发生一些异常
32 | */
33 | Reader run(String inPath) throws IOException;
34 | }
35 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/atzhaoPublic/W_UDF.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.atzhaoPublic;
2 |
3 | import java.io.IOException;
4 | import java.io.OutputStream;
5 |
6 | /**
7 | *
22 | * 您可以通过这里的实现,将不同的数据输出组件对接进来 23 | *
24 | * 还可以选择使用算法库,其中的算法数据组件是完善的,调用起来很方便 只需要通过 RW.getDT_UDF_Stream方法就可以获取到算法库的资源
25 | *
26 | * @param outPath 数据输出路径
27 | * @return 数据输出流或其子类
28 | * @throws IOException 数据输出组件的对接可能会发生异常
29 | */
30 | OutputStream run(String outPath) throws IOException;
31 | }
32 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/config/ConfigBase.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.config;
2 |
3 | import java.util.HashMap;
4 |
5 | /**
6 | * 配置信息库 Configuration repository
7 | */
8 | public class ConfigBase {
9 |
10 | /**
11 | * 配置文件路径 configuration file path
12 | */
13 | public static final String confPath = "conf";
14 | /**
15 | * 插件依赖路径 plugin dependency path
16 | */
17 | public static final String JarsPath = "lib/jars";
18 | /**
19 | * log4j日志文件 log4j log file path
20 | */
21 | public static final String log4j = confPath + "/log4j.properties";
22 | public static HashMap
38 | * The encoding set used for data input, which will not be called in the framework at present. The framework will automatically obtain the character encoding of the file and parse it. It may be used when expanding the framework.
39 | */
40 | public static String Incharset() {
41 | return conf.getOrDefault("In.charset", "utf-8");
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/config/ExtClasspathLoader.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.config;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.lang.reflect.Method;
6 | import java.net.URL;
7 | import java.net.URLClassLoader;
8 |
9 | /**
10 | *
26 | * Whether the initialization is successful or not, the initialization failure will force the execution of the method to terminate and return false
27 | */
28 | public boolean runAllClass(String[] classPath) {
29 | try {
30 | main(classPath);
31 | for (Init_Plug_in plug_in : LoadClass.objectArrayList) {
32 | System.out.println("* >>> 加载插件:" + plug_in.getName());
33 | if (!plug_in.run()) return false;
34 | }
35 | return true;
36 | } catch (Exception e) {
37 | System.err.println("* >?> 发生错误:" + e);
38 | return false;
39 | }
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataContainer/RWData.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataContainer;
2 |
3 | import java.io.Serializable;
4 |
5 | /**
6 | * 数据接口,本文件存储格式的所有数据都应该实现此类
7 | *
8 | * Data interface, all data in this file storage format should implement this class
9 | *
10 | * @param
16 | * data to be added to the container
17 | */
18 | @Override
19 | public void putData(String data) {
20 | stringBuilder.append(data);
21 | }
22 |
23 | /**
24 | * @return 从容器中取出来数据
25 | *
26 | * Extract data from container
27 | */
28 | @Override
29 | public String getData() {
30 | return stringBuilder.toString();
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/DataOutputFormat.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp;
2 |
3 | /**
4 | * 数据输出组件类型
5 | */
6 | public enum DataOutputFormat {
7 | UDT, // 自定义的数据输出组件
8 | built_in // 内置的数据输出组件
9 | }
10 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/DataSourceFormat.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp;
2 |
3 | /**
4 | * 数据输入组件类型
5 | */
6 | public enum DataSourceFormat {
7 | UDT, // 自定义的数据输入组件
8 | built_in // 内置的数据输入组件
9 | }
10 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/Filter.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW;
2 |
3 | /**
4 | * 过滤接口,用于实现过滤的lambda操作。
5 | *
6 | * Filter interface, used to implement filtering lambda operations.
7 | */
8 | public interface Filter {
9 | /**
10 | * 判断数据是否满足条件的接口。
11 | *
12 | * An interface for judging whether the data meets the conditions.
13 | *
14 | * @param data 被判断数据
15 | *
16 | * judged data.
17 | * @return 判断结果 为true 代表满足条件
18 | *
19 | * The judgment result is true, which means that the condition is met
20 | */
21 | boolean filter(String data);
22 | }
23 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/RW.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW;
2 |
3 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
4 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_builtIn_UDF;
5 | import zhao.io.dataTear.dataOp.dataTearStreams.dbStream.DataBaseStream;
6 | import zhao.io.dataTear.dataOp.dataTearStreams.hdfsStream.*;
7 | import zhao.io.dataTear.dataOp.dataTearStreams.localStream.*;
8 |
9 | import java.io.IOException;
10 | import java.util.regex.Pattern;
11 |
12 | /**
13 | * 数据读写组件接口 是本系统的Reader 与 DTMaster超接口,可以用来对接第三方各种程序
14 | *
15 | * @author 赵凌宇
16 | * @version 1.0
17 | */
18 | public interface RW {
19 |
20 | Pattern LINE_SPLIT = Pattern.compile("\n+");
21 | Pattern EQ = Pattern.compile("\\s+=\\s+");
22 | Pattern PATH_SPLIT = Pattern.compile("&+");
23 |
24 | /**
25 | * 算法库接口
26 | * DTfs_StreamBase算法库接口的对接者,这里便是提供对应数据算法流的库
27 | *
28 | * @param DTfs_streamName 组件类型 使用哪种方式操作DT数据目录
29 | * @return 流 会被系统插入,这里是内置的数据算法流组件
30 | * @see DT_StreamBase
31 | */
32 | static DT_StreamBase getDT_UDF_Stream(DT_builtIn_UDF DTfs_streamName) {
33 | DT_StreamBase DTfs_stream;
34 | switch (DTfs_streamName) {
35 | case LOCAL_TEXT:
36 | DTfs_stream = new LocalBufferStream();
37 | break;
38 | case HDFS_TEXT:
39 | DTfs_stream = new HDFSTextStream();
40 | break;
41 | case HDFS_ZIP:
42 | DTfs_stream = new HDFSZIPStream();
43 | break;
44 | case HDFS_GZIP:
45 | DTfs_stream = new HDFSGZIPStream();
46 | break;
47 | case HDFS_BZIP2:
48 | DTfs_stream = new HDFSBZIP2Stream();
49 | break;
50 | case HDFS_SNAPPY:
51 | DTfs_stream = new HDFSSnappyStream();
52 | break;
53 | case LOCAL_ZIP:
54 | DTfs_stream = new LocalZIPStream();
55 | break;
56 | case LOCAL_GZIP:
57 | DTfs_stream = new LocalGZIPStream();
58 | break;
59 | case LOCAL_BZIP2:
60 | DTfs_stream = new LocalBZIP2Stream();
61 | break;
62 | case LOCAL_SNAPPY:
63 | DTfs_stream = new LocalSnappyStream();
64 | break;
65 | case SQLDB_TEXT:
66 | DTfs_stream = new DataBaseStream();
67 | break;
68 | default: {
69 | Reader.logger.error("系统中没有您需要的组件哦!请重新设置 getDT_UDF_Stream() 的参数。");
70 | return null;
71 | }
72 | }
73 | Reader.logger.info("DT文件存储内置数据算法流库被访问了,分配适用于 " + DTfs_streamName.name() + " 算法的数据流组件: " + DTfs_stream);
74 | return DTfs_stream;
75 | }
76 |
77 |
78 | /**
79 | * @return 是否成功打开数据流
80 | */
81 | boolean openStream();
82 |
83 | /**
84 | * @return 是否成功对数据进行操作
85 | */
86 | boolean op_Data();
87 |
88 | /**
89 | * @return 是否成功关闭数据流
90 | * @throws IOException 关闭流失败
91 | */
92 | boolean closeStream() throws IOException;
93 | }
94 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/Writer.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW;
2 |
3 | import org.slf4j.Logger;
4 | import org.slf4j.LoggerFactory;
5 | import zhao.io.dataTear.atzhaoPublic.Product;
6 |
7 | import java.io.OutputStream;
8 |
9 | /**
10 | * @author 赵凌宇
11 | * 输出组件 通过输出组件的规则将按照DataTear的格式输出
12 | * 此接口会被系统内部自动调用,无需实现。
13 | */
14 | public abstract class Writer extends OutputStream implements Product
16 | * The HDFS data reading component uses the BZIP 2 algorithm for data decoding and reading, which corresponds to the data input of the BZIP 2 data.
17 | *
18 | * @see zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSWriterBZIP2
19 | */
20 | public class HDFSReaderBZIP2 extends Reader {
21 | protected final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
22 | Path In_path;
23 | String In_PathStr;
24 |
25 | public HDFSReaderBZIP2(FileSystem fileSystem, Path in_path, String in_PathStr) {
26 | try {
27 | setInputStream(fileSystem.open(in_path));
28 | In_path = in_path;
29 | In_PathStr = in_PathStr;
30 | } catch (IOException e) {
31 | logger.error("组件:" + this.getClass().getName() + " 启动数据流时出现异常!目标数据:" + in_PathStr + ",错误原因:" + e);
32 | e.printStackTrace(System.err);
33 | }
34 | }
35 |
36 | public static HDFSReaderBZIP2Builder builder() {
37 | return new HDFSReaderBZIP2Builder();
38 | }
39 |
40 | @Override
41 | public boolean closeStream() throws IOException {
42 | return super.closeStream();
43 | }
44 |
45 | @Override
46 | public boolean openStream() {
47 | return getInputStream() != null;
48 | }
49 |
50 | @Override
51 | public int read() throws IOException {
52 | op_Data();
53 | return -1;
54 | }
55 |
56 | @Override
57 | public boolean op_Data() {
58 | try {
59 | IOUtils.copy(getInputStream(), byteArrayOutputStream);
60 | return true;
61 | } catch (IOException e) {
62 | return false;
63 | } finally {
64 | try {
65 | byteArrayOutputStream.flush();
66 | } catch (IOException e) {
67 | e.printStackTrace();
68 | }
69 | try {
70 | byteArrayOutputStream.close();
71 | } catch (IOException e) {
72 | e.printStackTrace();
73 | }
74 | }
75 | }
76 |
77 | /**
78 | * @param data 需要解压的数组 Array to be decompressed
79 | * @return 解压之后的数组 Array after decompression
80 | */
81 | private byte[] unBZIP2(byte[] data) {
82 | ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(data);
83 | ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
84 | try {
85 | BZip2CompressorInputStream bZip2CompressorInputStream = new BZip2CompressorInputStream(byteArrayInputStream);
86 | IOUtils.copy(bZip2CompressorInputStream, byteArrayOutputStream);
87 | } catch (IOException e) {
88 | e.printStackTrace();
89 | }
90 | return byteArrayOutputStream.toString().trim().getBytes();
91 | }
92 |
93 | public int available() throws IOException {
94 | return getInputStream().available();
95 | }
96 |
97 | @Override
98 | public byte[] getDataArray() {
99 | setByteArray(unBZIP2(byteArrayOutputStream.toByteArray()));
100 | return super.getDataArray();
101 | }
102 |
103 | public void close() {
104 | try {
105 | closeStream();
106 | } catch (IOException e) {
107 | e.printStackTrace();
108 | }
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSReaderBZIP2Builder.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.atzhaoPublic.Builder;
7 | import zhao.io.dataTear.atzhaoPublic.Priority;
8 |
9 | import java.io.IOException;
10 |
11 | public class HDFSReaderBZIP2Builder implements Builder
21 | * Locate HDFS information
22 | * If FileSystem has not been set, this setting will take effect, that is to say, it is an alternate method, it will find the HDFS cluster by IP and port
23 | *
24 | * @param IP HDFS集群通讯地址 一般是主NameNode信息
25 | *
26 | * cluster communication address is generally the main NameNode information
27 | * @param port 通讯端口
28 | * communication port
29 | * @return 链
30 | */
31 | @Priority("2")
32 | public HDFSReaderBZIP2Builder setIP_port(String IP, String port) {
33 | configuration.set("fs.default.name", "hdfs://" + IP + ":" + port);
34 | return this;
35 | }
36 |
37 | /**
38 | * 定制更多配置信息
39 | * 如果没有进行过过FileSystem的设置,本设置将会生效,也就是说 它是一种备用方法
40 | *
41 | * Customize more configuration information If no FileSystem settings have been made, this setting will take effect, which means it is an alternate method
42 | *
43 | * @param key HDFS配置名称
44 | * @param value 配置参数
45 | * @return 链
46 | */
47 | @Priority("2")
48 | public HDFSReaderBZIP2Builder setKV(String key, String value) {
49 | configuration.set(key, value);
50 | return this;
51 | }
52 |
53 | /**
54 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
55 | *
56 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
57 | *
58 | * @param pathString 设置文件输出路径 set file path
59 | * @return 链
60 | */
61 | @Priority("3")
62 | public HDFSReaderBZIP2Builder setPathString(String pathString) {
63 | this.pathString = pathString;
64 | return this;
65 | }
66 |
67 | /**
68 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
69 | *
70 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
71 | *
72 | * @param pathObject 设置文件路径对象 set file path
73 | * @return 链
74 | */
75 | @Priority("2")
76 | public HDFSReaderBZIP2Builder setPathObject(Path pathObject) {
77 | this.pathObject = pathObject;
78 | return this;
79 | }
80 |
81 | /**
82 | * 直接通过FileSystem对象构建输入组件,这个是非常推荐的方法
83 | *
84 | * Build the input component directly from the File System object, this is a very recommended method
85 | *
86 | * @param fileSystem HDFS file System
87 | * @return 链
88 | */
89 | @Priority("1")
90 | public HDFSReaderBZIP2Builder setFileSystem(FileSystem fileSystem) {
91 | this.fileSystem = fileSystem;
92 | return this;
93 | }
94 |
95 | @Override
96 | public HDFSReaderBZIP2 create() {
97 | try {
98 | if (fileSystem == null) fileSystem = FileSystem.get(configuration);
99 | if (pathObject == null) pathObject = new Path(pathString);
100 | return new HDFSReaderBZIP2(fileSystem, pathObject, pathString);
101 | } catch (IOException e) {
102 | e.printStackTrace();
103 | return null;
104 | }
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSReaderGZIP.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.commons.compress.utils.IOUtils;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
7 |
8 | import java.io.ByteArrayInputStream;
9 | import java.io.ByteArrayOutputStream;
10 | import java.io.IOException;
11 | import java.util.zip.GZIPInputStream;
12 |
13 | /**
14 | * 在HDFS中使用GZIP算法进行数据解码读取的组件,通过此组件您可以直接读取HDFS中的GZIP数据
15 | *
16 | * A component that uses the GZIP algorithm to decode and read data in HDFS. With this component, you can directly read GZIP data in HDFS.
17 | */
18 | public class HDFSReaderGZIP extends Reader {
19 | protected final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
20 | protected Path In_path;
21 | protected String In_PathStr;
22 |
23 | public HDFSReaderGZIP(FileSystem fileSystem, Path in_path, String in_PathStr) {
24 | try {
25 | setInputStream(fileSystem.open(in_path));
26 | this.In_path = in_path;
27 | this.In_PathStr = in_PathStr;
28 | } catch (IOException e) {
29 | logger.error("组件:" + this.getClass().getName() + " 启动数据流时出现异常!目标数据:" + in_PathStr + ",错误原因:" + e);
30 | e.printStackTrace(System.err);
31 | }
32 | }
33 |
34 | public static HDFSReaderGZIPBuilder builder() {
35 | return new HDFSReaderGZIPBuilder();
36 | }
37 |
38 | @Override
39 | public boolean closeStream() throws IOException {
40 | return super.closeStream();
41 | }
42 |
43 | @Override
44 | public boolean openStream() {
45 | return getInputStream() != null;
46 | }
47 |
48 | @Override
49 | public int read() throws IOException {
50 | op_Data();
51 | return -1;
52 | }
53 |
54 | @Override
55 | public boolean op_Data() {
56 | try {
57 | IOUtils.copy(getInputStream(), byteArrayOutputStream);
58 | return true;
59 | } catch (IOException e) {
60 | return false;
61 | } finally {
62 | try {
63 | byteArrayOutputStream.flush();
64 | } catch (IOException e) {
65 | e.printStackTrace();
66 | }
67 | try {
68 | byteArrayOutputStream.close();
69 | } catch (IOException e) {
70 | e.printStackTrace();
71 | }
72 | }
73 | }
74 |
75 | /**
76 | * @param data 需要解压的数组 Array to be decompressed
77 | * @return 解压之后的数组 Array after decompression
78 | */
79 | private byte[] unGZip(byte[] data) {
80 | ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
81 | try {
82 | ByteArrayInputStream bis = new ByteArrayInputStream(data);
83 | GZIPInputStream gzipInputStream = new GZIPInputStream(bis);
84 | IOUtils.copy(gzipInputStream, byteArrayOutputStream);
85 | gzipInputStream.close();
86 | bis.close();
87 | } catch (Exception ex) {
88 | ex.printStackTrace();
89 | } finally {
90 | try {
91 | byteArrayOutputStream.flush();
92 | } catch (IOException e) {
93 | e.printStackTrace();
94 | }
95 | try {
96 | byteArrayOutputStream.close();
97 | } catch (IOException e) {
98 | e.printStackTrace();
99 | }
100 | }
101 | return byteArrayOutputStream.toString().trim().getBytes();
102 | }
103 |
104 | public int available() throws IOException {
105 | return getInputStream().available();
106 | }
107 |
108 | @Override
109 | public byte[] getDataArray() {
110 | setByteArray(unGZip(byteArrayOutputStream.toByteArray()));
111 | return super.getDataArray();
112 | }
113 |
114 | public void close() {
115 | try {
116 | closeStream();
117 | } catch (IOException e) {
118 | e.printStackTrace();
119 | }
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSReaderGZIPBuilder.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.atzhaoPublic.Builder;
7 | import zhao.io.dataTear.atzhaoPublic.Priority;
8 |
9 | import java.io.IOException;
10 |
11 | public class HDFSReaderGZIPBuilder implements Builder
21 | * Locate HDFS information
22 | * If FileSystem has not been set, this setting will take effect, that is to say, it is an alternate method, it will find the HDFS cluster by IP and port
23 | *
24 | * @param IP HDFS集群通讯地址 一般是主NameNode信息
25 | *
26 | * cluster communication address is generally the main NameNode information
27 | * @param port 通讯端口
28 | * communication port
29 | * @return 链
30 | */
31 | @Priority("2")
32 | public HDFSReaderGZIPBuilder setIP_port(String IP, String port) {
33 | configuration.set("fs.default.name", "hdfs://" + IP + ":" + port);
34 | return this;
35 | }
36 |
37 | /**
38 | * 定制更多配置信息
39 | * 如果没有进行过过FileSystem的设置,本设置将会生效,也就是说 它是一种备用方法
40 | *
41 | * Customize more configuration information If no FileSystem settings have been made, this setting will take effect, which means it is an alternate method
42 | *
43 | * @param key HDFS配置名称
44 | * @param value 配置参数
45 | * @return 链
46 | */
47 | @Priority("2")
48 | public HDFSReaderGZIPBuilder setKV(String key, String value) {
49 | configuration.set(key, value);
50 | return this;
51 | }
52 |
53 | /**
54 | * 定位输入路径 该方法不一定会被调用,因为针对文件输入路径的设置由DataTear去实现
55 | *
56 | * Locate the input path This method is not necessarily called, because the settings for the file input path are implemented by DataTear
57 | *
58 | * @param pathString 设置文件输入路径 set file path
59 | * @return 链
60 | */
61 | @Priority("3")
62 | public HDFSReaderGZIPBuilder setPathString(String pathString) {
63 | this.pathString = pathString;
64 | return this;
65 | }
66 |
67 | /**
68 | * 定位输入路径 该方法不一定会被调用,因为针对文件输入路径的设置由DataTear去实现
69 | *
70 | * Locate the input path This method is not necessarily called, because the settings for the file input path are implemented by DataTear
71 | *
72 | * @param pathObject 设置文件路径对象 set file path
73 | * @return 链
74 | */
75 | @Priority("2")
76 | public HDFSReaderGZIPBuilder setPathObject(Path pathObject) {
77 | this.pathObject = pathObject;
78 | return this;
79 | }
80 |
81 | /**
82 | * 直接通过FileSystem对象构建输入组件,这个是非常推荐的方法
83 | *
84 | * Build the input component directly from the File System object, this is a very recommended method
85 | *
86 | * @param fileSystem HDFS file System
87 | * @return 链
88 | */
89 | @Priority("1")
90 | public HDFSReaderGZIPBuilder setFileSystem(FileSystem fileSystem) {
91 | this.fileSystem = fileSystem;
92 | return this;
93 | }
94 |
95 | @Override
96 | public HDFSReaderGZIP create() {
97 | try {
98 | if (fileSystem == null) fileSystem = FileSystem.get(configuration);
99 | if (pathObject == null) pathObject = new Path(pathString);
100 | return new HDFSReaderGZIP(fileSystem, pathObject, pathString);
101 | } catch (IOException e) {
102 | e.printStackTrace();
103 | return null;
104 | }
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSReaderSnappy.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.commons.compress.utils.IOUtils;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import org.xerial.snappy.SnappyInputStream;
7 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
8 |
9 | import java.io.ByteArrayInputStream;
10 | import java.io.ByteArrayOutputStream;
11 | import java.io.IOException;
12 |
13 | /**
14 | * Snappy算法的输入流组件,您可以通过这个组件读取HDFS中的Snappy数据
15 | *
16 | * The input stream component of the Snappy algorithm, you can read the Snappy data in HDFS through this component.
17 | */
18 | public class HDFSReaderSnappy extends Reader {
19 | protected final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
20 | protected Path In_path;
21 | protected String In_PathStr;
22 |
23 | public HDFSReaderSnappy(FileSystem fileSystem, Path in_path, String in_PathStr) {
24 | try {
25 | setInputStream(fileSystem.open(in_path));
26 | In_path = in_path;
27 | In_PathStr = in_PathStr;
28 | } catch (IOException e) {
29 | logger.error("组件:" + this.getClass().getName() + " 启动数据流时出现异常!目标数据:" + in_PathStr + ",错误原因:" + e);
30 | e.printStackTrace(System.err);
31 | }
32 | }
33 |
34 | public static HDFSReaderSnappyBuilder builder() {
35 | return new HDFSReaderSnappyBuilder();
36 | }
37 |
38 | @Override
39 | public boolean closeStream() throws IOException {
40 | return super.closeStream();
41 | }
42 |
43 | @Override
44 | public boolean openStream() {
45 | return getInputStream() != null;
46 | }
47 |
48 | @Override
49 | public int read() throws IOException {
50 | op_Data();
51 | return -1;
52 | }
53 |
54 | @Override
55 | public boolean op_Data() {
56 | try {
57 | IOUtils.copy(getInputStream(), byteArrayOutputStream);
58 | return true;
59 | } catch (IOException e) {
60 | return false;
61 | } finally {
62 | try {
63 | byteArrayOutputStream.flush();
64 | } catch (IOException e) {
65 | e.printStackTrace();
66 | }
67 | try {
68 | byteArrayOutputStream.close();
69 | } catch (IOException e) {
70 | e.printStackTrace();
71 | }
72 | }
73 | }
74 |
75 | /**
76 | * @param data 需要解压的数组 Array to be decompressed
77 | * @return 解压之后的数组 Array after decompression
78 | */
79 | private byte[] unSnappy(byte[] data) {
80 | ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(data);
81 | ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
82 | try {
83 | SnappyInputStream framedSnappyCompressorInputStream = new SnappyInputStream(byteArrayInputStream);
84 | IOUtils.copy(framedSnappyCompressorInputStream, byteArrayOutputStream);
85 | } catch (IOException e) {
86 | e.printStackTrace();
87 | }
88 | return byteArrayOutputStream.toString().trim().getBytes();
89 | }
90 |
91 | public int available() throws IOException {
92 | return getInputStream().available();
93 | }
94 |
95 | @Override
96 | public byte[] getDataArray() {
97 | setByteArray(unSnappy(byteArrayOutputStream.toByteArray()));
98 | return super.getDataArray();
99 | }
100 |
101 | public void close() {
102 | try {
103 | closeStream();
104 | } catch (IOException e) {
105 | e.printStackTrace();
106 | }
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSReaderSnappyBuilder.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.atzhaoPublic.Builder;
7 | import zhao.io.dataTear.atzhaoPublic.Priority;
8 |
9 | import java.io.IOException;
10 |
11 | public class HDFSReaderSnappyBuilder implements Builder
21 | * Locate HDFS information
22 | * If FileSystem has not been set, this setting will take effect, that is to say, it is an alternate method, it will find the HDFS cluster by IP and port
23 | *
24 | * @param IP HDFS集群通讯地址 一般是主NameNode信息
25 | *
26 | * cluster communication address is generally the main NameNode information
27 | * @param port 通讯端口
28 | * communication port
29 | * @return 链
30 | */
31 | @Priority("2")
32 | public HDFSReaderSnappyBuilder setIP_port(String IP, String port) {
33 | configuration.set("fs.default.name", "hdfs://" + IP + ":" + port);
34 | return this;
35 | }
36 |
37 | /**
38 | * 定制更多配置信息
39 | * 如果没有进行过过FileSystem的设置,本设置将会生效,也就是说 它是一种备用方法
40 | *
41 | * Customize more configuration information If no FileSystem settings have been made, this setting will take effect, which means it is an alternate method
42 | *
43 | * @param key HDFS配置名称
44 | * @param value 配置参数
45 | * @return 链
46 | */
47 | @Priority("2")
48 | public HDFSReaderSnappyBuilder setKV(String key, String value) {
49 | configuration.set(key, value);
50 | return this;
51 | }
52 |
53 | /**
54 | * 定位输入路径 该方法不一定会被调用,因为针对文件输入路径的设置由DataTear去实现
55 | *
56 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
57 | *
58 | * @param pathString 设置文件输入路径 set file path
59 | * @return 链
60 | */
61 | @Priority("3")
62 | public HDFSReaderSnappyBuilder setPathString(String pathString) {
63 | this.pathString = pathString;
64 | return this;
65 | }
66 |
67 | /**
68 | * 定位输入路径 该方法不一定会被调用,因为针对文件输入路径的设置由DataTear去实现
69 | *
70 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
71 | *
72 | * @param pathObject 设置文件路径对象 set file path
73 | * @return 链
74 | */
75 | @Priority("2")
76 | public HDFSReaderSnappyBuilder setPathObject(Path pathObject) {
77 | this.pathObject = pathObject;
78 | return this;
79 | }
80 |
81 | /**
82 | * 直接通过FileSystem对象构建输入组件,这个是非常推荐的方法
83 | *
84 | * Build the output component directly from the File System object, this is a very recommended method
85 | *
86 | * @param fileSystem HDFS file System
87 | * @return 链
88 | */
89 | @Priority("1")
90 | public HDFSReaderSnappyBuilder setFileSystem(FileSystem fileSystem) {
91 | this.fileSystem = fileSystem;
92 | return this;
93 | }
94 |
95 | @Override
96 | public HDFSReaderSnappy create() {
97 | try {
98 | if (fileSystem == null) fileSystem = FileSystem.get(configuration);
99 | if (pathObject == null) pathObject = new Path(pathString);
100 | return new HDFSReaderSnappy(fileSystem, pathObject, pathString);
101 | } catch (IOException e) {
102 | e.printStackTrace();
103 | return null;
104 | }
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSReaderZIP.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.commons.compress.utils.IOUtils;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
7 |
8 | import java.io.ByteArrayInputStream;
9 | import java.io.ByteArrayOutputStream;
10 | import java.io.IOException;
11 | import java.util.zip.ZipInputStream;
12 |
13 | public class HDFSReaderZIP extends Reader {
14 | protected final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
15 | Path In_path;
16 | String In_PathStr;
17 |
18 | public HDFSReaderZIP(FileSystem fileSystem, Path in_path, String in_PathStr, String charset) {
19 | try {
20 | setInputStream(fileSystem.open(in_path));
21 | In_path = in_path;
22 | In_PathStr = in_PathStr;
23 | } catch (IOException e) {
24 | logger.error("组件:" + this.getClass().getName() + " 启动数据流时出现异常!目标数据:" + in_PathStr + ",错误原因:" + e);
25 | e.printStackTrace(System.err);
26 | }
27 | }
28 |
29 | public static HDFSReaderZIPBuilder builder() {
30 | return new HDFSReaderZIPBuilder();
31 | }
32 |
33 | @Override
34 | public boolean closeStream() throws IOException {
35 | return super.closeStream();
36 | }
37 |
38 | @Override
39 | public boolean openStream() {
40 | return getInputStream() != null;
41 | }
42 |
43 | @Override
44 | public int read() throws IOException {
45 | op_Data();
46 | return -1;
47 | }
48 |
49 | @Override
50 | public boolean op_Data() {
51 | try {
52 | IOUtils.copy(getInputStream(), byteArrayOutputStream);
53 | return true;
54 | } catch (IOException e) {
55 | return false;
56 | } finally {
57 | try {
58 | byteArrayOutputStream.flush();
59 | } catch (IOException e) {
60 | e.printStackTrace();
61 | }
62 | try {
63 | byteArrayOutputStream.close();
64 | } catch (IOException e) {
65 | e.printStackTrace();
66 | }
67 | }
68 | }
69 |
70 | /**
71 | * @param data 需要解压的数组 Array to be decompressed
72 | * @return 解压之后的数组 Array after decompression
73 | */
74 | private byte[] unZip(byte[] data) {
75 | byte[] b = null;
76 | try {
77 | ByteArrayInputStream bis = new ByteArrayInputStream(data);
78 | ZipInputStream zip = new ZipInputStream(bis);
79 | if (zip.getNextEntry() != null) {
80 | ByteArrayOutputStream baos = new ByteArrayOutputStream();
81 | IOUtils.copy(zip, baos);
82 | b = baos.toByteArray();
83 | baos.flush();
84 | baos.close();
85 | }
86 | zip.close();
87 | bis.close();
88 | } catch (Exception ex) {
89 | ex.printStackTrace();
90 | }
91 | return b;
92 | }
93 |
94 | public int available() throws IOException {
95 | return getInputStream().available();
96 | }
97 |
98 | @Override
99 | public byte[] getDataArray() {
100 | setByteArray(unZip(byteArrayOutputStream.toByteArray()));
101 | return super.getDataArray();
102 | }
103 |
104 | public void close() {
105 | try {
106 | closeStream();
107 | } catch (IOException e) {
108 | e.printStackTrace();
109 | }
110 | }
111 | }
112 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSWriter.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.fs.FSDataOutputStream;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.dataOp.dataTearRW.Writer;
7 |
8 | import java.io.IOException;
9 | import java.nio.charset.Charset;
10 |
11 | /**
12 | * 向HDFS分布式集群中写数据的组件
13 | * 该组件将会被DTMaster调用,按照DT的格式输出数据
14 | *
15 | * The component that writes data to the HDFS distributed cluster This component will be called by DTMaster and output data in DT format
16 | */
17 | public class HDFSWriter extends Writer {
18 |
19 | protected final FSDataOutputStream fsDataOutputStream;
20 | Path Out_path;
21 | String Out_Pathstr;
22 | private Charset charset;
23 |
24 | /**
25 | * 由建造者进行构造函数的执行,获取到这个类
26 | *
27 | * The constructor executes the constructor to obtain this class
28 | *
29 | * @param fileSystem HDFS对象,会通过该对象获取HDFS集群地址
30 | *
31 | * HDFS object, through which the HDFS cluster address will be obtained
32 | * @param path 数据输出路径 data output path
33 | * @param charset 数据输出编码集 Data output code set
34 | * @throws IOException 无法通过HDFS对象获取目标HDFS 或 Path错误的时候,都有可能抛出该错误
35 | *
36 | * This error may be thrown when the target HDFS or Path error cannot be obtained through the HDFS object
37 | */
38 | public HDFSWriter(FileSystem fileSystem, Path path, Charset charset) throws IOException {
39 | this.fsDataOutputStream = fileSystem.create(path);
40 | Out_path = path;
41 | Out_Pathstr = Out_path.toString();
42 | this.charset = charset;
43 | }
44 |
45 | /**
46 | * 使用默认字符集构造函数 Use default charset constructor
47 | *
48 | * @param fileSystem HDFS对象,会通过该对象获取HDFS集群地址
49 | *
50 | * HDFS object, through which the HDFS cluster address will be obtained
51 | * @param path 数据输出路径 data output path
52 | * @throws IOException 无法通过HDFS对象获取目标HDFS 或 Path错误的时候,都有可能抛出该错误
53 | *
54 | * This error may be thrown when the target HDFS or Path error cannot be obtained through the HDFS object
55 | */
56 | public HDFSWriter(FileSystem fileSystem, Path path) throws IOException {
57 | this.fsDataOutputStream = fileSystem.create(path);
58 | Out_path = path;
59 | Out_Pathstr = Out_path.toString();
60 | }
61 |
62 | /**
63 | * @return HDFS的DataTear文件输出组件的建造者对象
64 | *
65 | * Builder object for HDFS's Data Tear file output component
66 | */
67 | public static HDFSWriterBuilder builder() {
68 | return new HDFSWriterBuilder();
69 | }
70 |
71 | @Override
72 | public void write(byte[] b) throws IOException {
73 | fsDataOutputStream.write(b);
74 | }
75 |
76 | @Override
77 | public void write(byte[] b, int off, int len) throws IOException {
78 | fsDataOutputStream.write(b, off, len);
79 | }
80 |
81 | @Override
82 | public void flush() throws IOException {
83 | fsDataOutputStream.flush();
84 | }
85 |
86 | @Override
87 | public void close() throws IOException {
88 | fsDataOutputStream.close();
89 | }
90 |
91 | @Override
92 | public void write(int b) throws IOException {
93 | fsDataOutputStream.write(b);
94 | }
95 |
96 | @Override
97 | public String getPath() {
98 | return Out_Pathstr;
99 | }
100 |
101 | @Override
102 | public Writer toToObject() {
103 | return this;
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSWriterBZIP2.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
4 | import org.apache.hadoop.fs.FSDataOutputStream;
5 | import org.apache.hadoop.fs.FileSystem;
6 | import org.apache.hadoop.fs.Path;
7 | import zhao.io.dataTear.dataOp.dataTearRW.Writer;
8 |
9 | import java.io.ByteArrayOutputStream;
10 | import java.io.IOException;
11 | import java.nio.charset.Charset;
12 |
13 | /**
14 | * @author 赵凌宇
15 | * HDFS中构建BZIP2_DT目录的数据输出组件
16 | *
17 | * Data output component for building BZIP 2_DT directory in HDFS
18 | */
19 | public class HDFSWriterBZIP2 extends Writer {
20 |
21 | protected final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
22 | protected final ByteArrayOutputStream OKYA = new ByteArrayOutputStream();
23 | protected FSDataOutputStream fsDataOutputStream;
24 | Path Out_path;
25 | String Out_PathStr;
26 | private Charset charset;
27 |
28 | /**
29 | * 由建造者进行构造函数的执行,获取到这个类
30 | *
31 | * The constructor executes the constructor to obtain this class
32 | *
33 | * @param fileSystem HDFS对象,会通过该对象获取HDFS集群地址
34 | *
35 | * HDFS object, through which the HDFS cluster address will be obtained
36 | * @param path 数据输出路径 data output path
37 | * @param charset 数据输出编码集 Data output code set
38 | * @throws IOException 无法通过HDFS对象获取目标HDFS 或 Path错误的时候,都有可能抛出该错误
39 | *
40 | * This error may be thrown when the target HDFS or Path error cannot be obtained through the HDFS object
41 | */
42 | public HDFSWriterBZIP2(FileSystem fileSystem, Path path, Charset charset) throws IOException {
43 | this.fsDataOutputStream = fileSystem.create(path);
44 | Out_path = path;
45 | Out_PathStr = Out_path.toString();
46 | this.charset = charset;
47 | }
48 |
49 | /**
50 | * 使用默认字符集构造函数 Use default charset constructor
51 | *
52 | * @param fileSystem HDFS对象,会通过该对象获取HDFS集群地址
53 | *
54 | * HDFS object, through which the HDFS cluster address will be obtained
55 | * @param path 数据输出路径 data output path
56 | * @throws IOException 无法通过HDFS对象获取目标HDFS 或 Path错误的时候,都有可能抛出该错误
57 | *
58 | * This error may be thrown when the target HDFS or Path error cannot be obtained through the HDFS object
59 | */
60 | public HDFSWriterBZIP2(FileSystem fileSystem, Path path) throws IOException {
61 | this.fsDataOutputStream = fileSystem.create(path);
62 | Out_path = path;
63 | Out_PathStr = Out_path.toString();
64 | }
65 |
66 |
67 | public static HDFSWriterBZIP2Builder builder() {
68 | return new HDFSWriterBZIP2Builder();
69 | }
70 |
71 | @Override
72 | public void write(byte[] b) throws IOException {
73 | byteArrayOutputStream.write(b);
74 | }
75 |
76 | @Override
77 | public void write(byte[] b, int off, int len) throws IOException {
78 | byteArrayOutputStream.write(b, off, len);
79 | }
80 |
81 | @Override
82 | public void flush() throws IOException {
83 | byteArrayOutputStream.flush();
84 | BZip2CompressorOutputStream bzip2OutputStream = new BZip2CompressorOutputStream(OKYA);
85 | bzip2OutputStream.write(byteArrayOutputStream.toByteArray());
86 | bzip2OutputStream.flush();
87 | bzip2OutputStream.close();
88 | fsDataOutputStream.write(OKYA.toByteArray());
89 | }
90 |
91 | @Override
92 | public void close() throws IOException {
93 | OKYA.flush();
94 | OKYA.close();
95 | byteArrayOutputStream.close();
96 | fsDataOutputStream.flush();
97 | fsDataOutputStream.close();
98 | }
99 |
100 | @Override
101 | public String getPath() {
102 | return this.Out_PathStr;
103 | }
104 |
105 | @Override
106 | public void write(int b) throws IOException {
107 | byteArrayOutputStream.write(b);
108 | }
109 |
110 | @Override
111 | public Writer toToObject() {
112 | return this;
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSWriterBZIP2Builder.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.atzhaoPublic.Builder;
7 | import zhao.io.dataTear.atzhaoPublic.Priority;
8 |
9 | import java.io.IOException;
10 |
11 | /**
12 | * HDFS数据输出组件建造者 HDFS data output component builder
13 | */
14 | public class HDFSWriterBZIP2Builder implements Builder
24 | * Locate HDFS information
25 | * If FileSystem has not been set, this setting will take effect, that is to say, it is an alternate method, it will find the HDFS cluster by IP and port
26 | *
27 | * @param IP HDFS集群通讯地址 一般是主NameNode信息
28 | *
29 | * cluster communication address is generally the main NameNode information
30 | * @param port 通讯端口
31 | * communication port
32 | * @return 链
33 | */
34 | @Priority("2")
35 | public HDFSWriterBZIP2Builder setIP_port(String IP, String port) {
36 | configuration.set("fs.default.name", "hdfs://" + IP + ":" + port);
37 | return this;
38 | }
39 |
40 | /**
41 | * 定制更多配置信息
42 | * 如果没有进行过过FileSystem的设置,本设置将会生效,也就是说 它是一种备用方法
43 | *
44 | * Customize more configuration information If no FileSystem settings have been made, this setting will take effect, which means it is an alternate method
45 | *
46 | * @param key HDFS配置名称
47 | * @param value 配置参数
48 | * @return 链
49 | */
50 | @Priority("2")
51 | public HDFSWriterBZIP2Builder setKV(String key, String value) {
52 | configuration.set(key, value);
53 | return this;
54 | }
55 |
56 | /**
57 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
58 | *
59 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
60 | *
61 | * @param pathString 设置文件输出路径 set file path
62 | * @return 链
63 | */
64 | @Priority("3")
65 | public HDFSWriterBZIP2Builder setPathString(String pathString) {
66 | this.pathString = pathString;
67 | return this;
68 | }
69 |
70 | /**
71 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
72 | *
73 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
74 | *
75 | * @param pathObject 设置文件路径对象 set file path
76 | * @return 链
77 | */
78 | @Priority("2")
79 | public HDFSWriterBZIP2Builder setPathObject(Path pathObject) {
80 | this.pathObject = pathObject;
81 | return this;
82 | }
83 |
84 | /**
85 | * 直接通过FileSystem对象构建输出组件,这个是非常推荐的方法
86 | *
87 | * Build the output component directly from the File System object, this is a very recommended method
88 | *
89 | * @param fileSystem HDFS file System
90 | * @return 链
91 | */
92 | @Priority("1")
93 | public HDFSWriterBZIP2Builder setFileSystem(FileSystem fileSystem) {
94 | this.fileSystem = fileSystem;
95 | return this;
96 | }
97 |
98 | @Override
99 | public HDFSWriterBZIP2 create() {
100 | try {
101 | if (pathObject == null) pathObject = new Path(pathString);
102 | return new HDFSWriterBZIP2(fileSystem, pathObject);
103 | } catch (IOException e) {
104 | e.printStackTrace();
105 | return null;
106 | }
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSWriterBuilder.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.atzhaoPublic.Builder;
7 | import zhao.io.dataTear.atzhaoPublic.Priority;
8 |
9 | import java.io.IOException;
10 |
11 | /**
12 | * HDFS数据输出组件建造者 HDFS data output component builder
13 | */
14 | public class HDFSWriterBuilder implements Builder
24 | * Locate HDFS information
25 | * If FileSystem has not been set, this setting will take effect, that is to say, it is an alternate method, it will find the HDFS cluster by IP and port
26 | *
27 | * @param IP HDFS集群通讯地址 一般是主NameNode信息
28 | *
29 | * cluster communication address is generally the main NameNode information
30 | * @param port 通讯端口
31 | * communication port
32 | * @return 链
33 | */
34 | @Priority("2")
35 | public HDFSWriterBuilder setIP_port(String IP, String port) {
36 | configuration.set("fs.default.name", "hdfs://" + IP + ":" + port);
37 | return this;
38 | }
39 |
40 | /**
41 | * 定制更多配置信息
42 | * 如果没有进行过过FileSystem的设置,本设置将会生效,也就是说 它是一种备用方法
43 | *
44 | * Customize more configuration information If no FileSystem settings have been made, this setting will take effect, which means it is an alternate method
45 | *
46 | * @param key HDFS配置名称
47 | * @param value 配置参数
48 | * @return 链
49 | */
50 | @Priority("2")
51 | public HDFSWriterBuilder setKV(String key, String value) {
52 | configuration.set(key, value);
53 | return this;
54 | }
55 |
56 | /**
57 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
58 | *
59 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
60 | *
61 | * @param pathString 设置文件输出路径 set file path
62 | * @return 链
63 | */
64 | @Priority("3")
65 | public HDFSWriterBuilder setPathString(String pathString) {
66 | this.pathString = pathString;
67 | return this;
68 | }
69 |
70 | /**
71 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
72 | *
73 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
74 | *
75 | * @param pathObject 设置文件路径对象 set file path
76 | * @return 链
77 | */
78 | @Priority("2")
79 | public HDFSWriterBuilder setPathObject(Path pathObject) {
80 | this.pathObject = pathObject;
81 | return this;
82 | }
83 |
84 | /**
85 | * 直接通过FileSystem对象构建输出组件,这个是非常推荐的方法
86 | *
87 | * Build the output component directly from the File System object, this is a very recommended method
88 | *
89 | * @param fileSystem HDFS file System
90 | * @return 链
91 | */
92 | @Priority("1")
93 | public HDFSWriterBuilder setFileSystem(FileSystem fileSystem) {
94 | this.fileSystem = fileSystem;
95 | return this;
96 | }
97 |
98 | @Override
99 | public HDFSWriter create() {
100 | try {
101 | if (pathObject == null) pathObject = new Path(pathString);
102 | return new HDFSWriter(fileSystem, pathObject);
103 | } catch (IOException e) {
104 | e.printStackTrace();
105 | return null;
106 | }
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSWriterGZIP.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.fs.FSDataOutputStream;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.dataOp.dataTearRW.Writer;
7 |
8 | import java.io.ByteArrayOutputStream;
9 | import java.io.IOException;
10 | import java.nio.charset.Charset;
11 | import java.util.zip.GZIPOutputStream;
12 |
13 | /**
14 | * @author 赵凌宇
15 | * HDFS中构建GZIP_DT目录的数据输出组件
16 | */
17 | public class HDFSWriterGZIP extends Writer {
18 |
19 | protected final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
20 | protected final ByteArrayOutputStream OKYA = new ByteArrayOutputStream();
21 | protected FSDataOutputStream fsDataOutputStream;
22 | Path Out_path;
23 | String Out_PathStr;
24 | private Charset charset;
25 |
26 | /**
27 | * 由建造者进行构造函数的执行,获取到这个类
28 | *
29 | * The constructor executes the constructor to obtain this class
30 | *
31 | * @param fileSystem HDFS对象,会通过该对象获取HDFS集群地址
32 | *
33 | * HDFS object, through which the HDFS cluster address will be obtained
34 | * @param path 数据输出路径 data output path
35 | * @param charset 数据输出编码集 Data output code set
36 | * @throws IOException 无法通过HDFS对象获取目标HDFS 或 Path错误的时候,都有可能抛出该错误
37 | *
38 | * This error may be thrown when the target HDFS or Path error cannot be obtained through the HDFS object
39 | */
40 | public HDFSWriterGZIP(FileSystem fileSystem, Path path, Charset charset) throws IOException {
41 | this.fsDataOutputStream = fileSystem.create(path);
42 | Out_path = path;
43 | Out_PathStr = Out_path.toString();
44 | this.charset = charset;
45 | }
46 |
47 | /**
48 | * 使用默认字符集构造函数 Use default charset constructor
49 | *
50 | * @param fileSystem HDFS对象,会通过该对象获取HDFS集群地址
51 | *
52 | * HDFS object, through which the HDFS cluster address will be obtained
53 | * @param path 数据输出路径 data output path
54 | * @throws IOException 无法通过HDFS对象获取目标HDFS 或 Path错误的时候,都有可能抛出该错误
55 | *
56 | * This error may be thrown when the target HDFS or Path error cannot be obtained through the HDFS object
57 | */
58 | public HDFSWriterGZIP(FileSystem fileSystem, Path path) throws IOException {
59 | this.fsDataOutputStream = fileSystem.create(path);
60 | Out_path = path;
61 | Out_PathStr = Out_path.toString();
62 | }
63 |
64 |
65 | public static HDFSWriterGZIPBuilder builder() {
66 | return new HDFSWriterGZIPBuilder();
67 | }
68 |
69 | @Override
70 | public void write(byte[] b) throws IOException {
71 | byteArrayOutputStream.write(b);
72 | }
73 |
74 | @Override
75 | public void write(byte[] b, int off, int len) throws IOException {
76 | byteArrayOutputStream.write(b, off, len);
77 | }
78 |
79 | @Override
80 | public void flush() throws IOException {
81 | byteArrayOutputStream.flush();
82 | GZIPOutputStream gzipOutputStream = new GZIPOutputStream(OKYA);
83 | gzipOutputStream.write(byteArrayOutputStream.toByteArray());
84 | gzipOutputStream.flush();
85 | gzipOutputStream.close();
86 | fsDataOutputStream.write(OKYA.toByteArray());
87 | }
88 |
89 | @Override
90 | public void close() throws IOException {
91 | OKYA.flush();
92 | OKYA.close();
93 | byteArrayOutputStream.close();
94 | fsDataOutputStream.flush();
95 | fsDataOutputStream.close();
96 | }
97 |
98 | @Override
99 | public String getPath() {
100 | return this.Out_PathStr;
101 | }
102 |
103 | @Override
104 | public void write(int b) throws IOException {
105 | byteArrayOutputStream.write(b);
106 | }
107 |
108 | @Override
109 | public Writer toToObject() {
110 | return this;
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSWriterGZIPBuilder.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.atzhaoPublic.Builder;
7 | import zhao.io.dataTear.atzhaoPublic.Priority;
8 |
9 | import java.io.IOException;
10 |
11 | /**
12 | * HDFS数据输出组件建造者 HDFS data output component builder
13 | */
14 | public class HDFSWriterGZIPBuilder implements Builder
24 | * Locate HDFS information
25 | * If FileSystem has not been set, this setting will take effect, that is to say, it is an alternate method, it will find the HDFS cluster by IP and port
26 | *
27 | * @param IP HDFS集群通讯地址 一般是主NameNode信息
28 | *
29 | * cluster communication address is generally the main NameNode information
30 | * @param port 通讯端口
31 | * communication port
32 | * @return 链
33 | */
34 | @Priority("2")
35 | public HDFSWriterGZIPBuilder setIP_port(String IP, String port) {
36 | configuration.set("fs.default.name", "hdfs://" + IP + ":" + port);
37 | return this;
38 | }
39 |
40 | /**
41 | * 定制更多配置信息
42 | * 如果没有进行过过FileSystem的设置,本设置将会生效,也就是说 它是一种备用方法
43 | *
44 | * Customize more configuration information If no FileSystem settings have been made, this setting will take effect, which means it is an alternate method
45 | *
46 | * @param key HDFS配置名称
47 | * @param value 配置参数
48 | * @return 链
49 | */
50 | @Priority("2")
51 | public HDFSWriterGZIPBuilder setKV(String key, String value) {
52 | configuration.set(key, value);
53 | return this;
54 | }
55 |
56 | /**
57 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
58 | *
59 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
60 | *
61 | * @param pathString 设置文件输出路径 set file path
62 | * @return 链
63 | */
64 | @Priority("3")
65 | public HDFSWriterGZIPBuilder setPathString(String pathString) {
66 | this.pathString = pathString;
67 | return this;
68 | }
69 |
70 | /**
71 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
72 | *
73 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
74 | *
75 | * @param pathObject 设置文件路径对象 set file path
76 | * @return 链
77 | */
78 | @Priority("2")
79 | public HDFSWriterGZIPBuilder setPathObject(Path pathObject) {
80 | this.pathObject = pathObject;
81 | return this;
82 | }
83 |
84 | /**
85 | * 直接通过FileSystem对象构建输出组件,这个是非常推荐的方法
86 | *
87 | * Build the output component directly from the File System object, this is a very recommended method
88 | *
89 | * @param fileSystem HDFS file System
90 | * @return 链
91 | */
92 | @Priority("1")
93 | public HDFSWriterGZIPBuilder setFileSystem(FileSystem fileSystem) {
94 | this.fileSystem = fileSystem;
95 | return this;
96 | }
97 |
98 | @Override
99 | public HDFSWriterGZIP create() {
100 | try {
101 | if (pathObject == null) pathObject = new Path(pathString);
102 | return new HDFSWriterGZIP(fileSystem, pathObject);
103 | } catch (IOException e) {
104 | e.printStackTrace();
105 | return null;
106 | }
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSWriterSnappy.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.fs.FSDataOutputStream;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import org.xerial.snappy.SnappyOutputStream;
7 | import zhao.io.dataTear.dataOp.dataTearRW.Writer;
8 |
9 | import java.io.ByteArrayOutputStream;
10 | import java.io.IOException;
11 | import java.nio.charset.Charset;
12 |
13 | /**
14 | * @author 赵凌宇
15 | * HDFS中构建Snappy_DT目录的数据输出组件
16 | */
17 | public class HDFSWriterSnappy extends Writer {
18 |
19 | protected final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
20 | protected final ByteArrayOutputStream OKYA = new ByteArrayOutputStream();
21 | protected FSDataOutputStream fsDataOutputStream;
22 | Path Out_path;
23 | String Out_PathStr;
24 | private Charset charset;
25 |
26 | /**
27 | * 由建造者进行构造函数的执行,获取到这个类
28 | *
29 | * The constructor executes the constructor to obtain this class
30 | *
31 | * @param fileSystem HDFS对象,会通过该对象获取HDFS集群地址
32 | *
33 | * HDFS object, through which the HDFS cluster address will be obtained
34 | * @param path 数据输出路径 data output path
35 | * @param charset 数据输出编码集 Data output code set
36 | * @throws IOException 无法通过HDFS对象获取目标HDFS 或 Path错误的时候,都有可能抛出该错误
37 | *
38 | * This error may be thrown when the target HDFS or Path error cannot be obtained through the HDFS object
39 | */
40 | public HDFSWriterSnappy(FileSystem fileSystem, Path path, Charset charset) throws IOException {
41 | this.fsDataOutputStream = fileSystem.create(path);
42 | Out_path = path;
43 | Out_PathStr = Out_path.toString();
44 | this.charset = charset;
45 | }
46 |
47 | /**
48 | * 使用默认字符集构造函数 Use default charset constructor
49 | *
50 | * @param fileSystem HDFS对象,会通过该对象获取HDFS集群地址
51 | *
52 | * HDFS object, through which the HDFS cluster address will be obtained
53 | * @param path 数据输出路径 data output path
54 | * @throws IOException 无法通过HDFS对象获取目标HDFS 或 Path错误的时候,都有可能抛出该错误
55 | *
56 | * This error may be thrown when the target HDFS or Path error cannot be obtained through the HDFS object
57 | */
58 | public HDFSWriterSnappy(FileSystem fileSystem, Path path) throws IOException {
59 | this.fsDataOutputStream = fileSystem.create(path);
60 | Out_path = path;
61 | Out_PathStr = Out_path.toString();
62 | }
63 |
64 |
65 | public static HDFSWriterSnappyBuilder builder() {
66 | return new HDFSWriterSnappyBuilder();
67 | }
68 |
69 | @Override
70 | public void write(byte[] b) throws IOException {
71 | byteArrayOutputStream.write(b);
72 | }
73 |
74 | @Override
75 | public void write(byte[] b, int off, int len) throws IOException {
76 | byteArrayOutputStream.write(b, off, len);
77 | }
78 |
79 | @Override
80 | public void flush() throws IOException {
81 | byteArrayOutputStream.flush();
82 | SnappyOutputStream snappyOutputStream = new SnappyOutputStream(OKYA);
83 | snappyOutputStream.write(byteArrayOutputStream.toByteArray());
84 | snappyOutputStream.flush();
85 | snappyOutputStream.close();
86 | fsDataOutputStream.write(OKYA.toByteArray());
87 | }
88 |
89 | @Override
90 | public void close() throws IOException {
91 | OKYA.flush();
92 | OKYA.close();
93 | byteArrayOutputStream.close();
94 | fsDataOutputStream.flush();
95 | fsDataOutputStream.close();
96 | }
97 |
98 | @Override
99 | public String getPath() {
100 | return this.Out_PathStr;
101 | }
102 |
103 | @Override
104 | public void write(int b) throws IOException {
105 | byteArrayOutputStream.write(b);
106 | }
107 |
108 | @Override
109 | public Writer toToObject() {
110 | return this;
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSWriterSnappyBuilder.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.atzhaoPublic.Builder;
7 | import zhao.io.dataTear.atzhaoPublic.Priority;
8 |
9 | import java.io.IOException;
10 |
11 | /**
12 | * HDFS数据输出组件建造者 HDFS data output component builder
13 | */
14 | public class HDFSWriterSnappyBuilder implements Builder
24 | * Locate HDFS information
25 | * If FileSystem has not been set, this setting will take effect, that is to say, it is an alternate method, it will find the HDFS cluster by IP and port
26 | *
27 | * @param IP HDFS集群通讯地址 一般是主NameNode信息
28 | *
29 | * cluster communication address is generally the main NameNode information
30 | * @param port 通讯端口
31 | * communication port
32 | * @return 链
33 | */
34 | @Priority("2")
35 | public HDFSWriterSnappyBuilder setIP_port(String IP, String port) {
36 | configuration.set("fs.default.name", "hdfs://" + IP + ":" + port);
37 | return this;
38 | }
39 |
40 | /**
41 | * 定制更多配置信息
42 | * 如果没有进行过过FileSystem的设置,本设置将会生效,也就是说 它是一种备用方法
43 | *
44 | * Customize more configuration information If no FileSystem settings have been made, this setting will take effect, which means it is an alternate method
45 | *
46 | * @param key HDFS配置名称
47 | * @param value 配置参数
48 | * @return 链
49 | */
50 | @Priority("2")
51 | public HDFSWriterSnappyBuilder setKV(String key, String value) {
52 | configuration.set(key, value);
53 | return this;
54 | }
55 |
56 | /**
57 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
58 | *
59 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
60 | *
61 | * @param pathString 设置文件输出路径 set file path
62 | * @return 链
63 | */
64 | @Priority("3")
65 | public HDFSWriterSnappyBuilder setPathString(String pathString) {
66 | this.pathString = pathString;
67 | return this;
68 | }
69 |
70 | /**
71 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
72 | *
73 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
74 | *
75 | * @param pathObject 设置文件路径对象 set file path
76 | * @return 链
77 | */
78 | @Priority("2")
79 | public HDFSWriterSnappyBuilder setPathObject(Path pathObject) {
80 | this.pathObject = pathObject;
81 | return this;
82 | }
83 |
84 | /**
85 | * 直接通过FileSystem对象构建输出组件,这个是非常推荐的方法
86 | *
87 | * Build the output component directly from the File System object, this is a very recommended method
88 | *
89 | * @param fileSystem HDFS file System
90 | * @return 链
91 | */
92 | @Priority("1")
93 | public HDFSWriterSnappyBuilder setFileSystem(FileSystem fileSystem) {
94 | this.fileSystem = fileSystem;
95 | return this;
96 | }
97 |
98 | @Override
99 | public HDFSWriterSnappy create() {
100 | try {
101 | if (pathObject == null) pathObject = new Path(pathString);
102 | return new HDFSWriterSnappy(fileSystem, pathObject);
103 | } catch (IOException e) {
104 | e.printStackTrace();
105 | return null;
106 | }
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSWriterZIP.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.fs.FSDataOutputStream;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.dataOp.dataTearRW.Writer;
7 |
8 | import java.io.ByteArrayOutputStream;
9 | import java.io.IOException;
10 | import java.util.zip.ZipEntry;
11 | import java.util.zip.ZipOutputStream;
12 |
13 | /**
14 | * @author 赵凌宇
15 | * HDFS输出ZIP算法的DT目录构造组件
16 | */
17 | public class HDFSWriterZIP extends Writer {
18 | protected final ByteArrayOutputStream noya = new ByteArrayOutputStream();
19 | protected final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
20 | protected FSDataOutputStream fsDataOutputStream;
21 | Path Out_path;
22 | String Out_PathStr;
23 |
24 | /**
25 | * 由建造者进行构造函数的执行,获取到这个类
26 | *
27 | * The constructor executes the constructor to obtain this class
28 | *
29 | * @param fileSystem HDFS对象,会通过该对象获取HDFS集群地址
30 | *
31 | * HDFS object, through which the HDFS cluster address will be obtained
32 | * @param path 数据输出路径 data output path
33 | * @param charset 数据输出编码集 Data output code set
34 | * @throws IOException 无法通过HDFS对象获取目标HDFS 或 Path错误的时候,都有可能抛出该错误
35 | *
36 | * This error may be thrown when the target HDFS or Path error cannot be obtained through the HDFS object
37 | */
38 | public HDFSWriterZIP(FileSystem fileSystem, Path path, String charset) throws IOException {
39 | this.fsDataOutputStream = fileSystem.create(path);
40 | Out_path = path;
41 | Out_PathStr = Out_path.toString();
42 | }
43 |
44 | /**
45 | * @return HDFS的DataTear文件输出组件的建造者对象
46 | *
47 | * Builder object for HDFS's Data Tear file output component
48 | */
49 | public static HDFSWriterZIPBuilder builder() {
50 | return new HDFSWriterZIPBuilder();
51 | }
52 |
53 | @Override
54 | public void write(byte[] b, int off, int len) throws IOException {
55 | noya.write(b, off, len);
56 | }
57 |
58 | @Override
59 | public void flush() throws IOException {
60 | noya.flush();
61 | ZipOutputStream zipOutputStream = new ZipOutputStream(byteArrayOutputStream);
62 | ZipEntry zipEntry = new ZipEntry("DT-ZHAO");
63 | zipOutputStream.putNextEntry(zipEntry);
64 | zipOutputStream.write(noya.toByteArray());
65 | zipOutputStream.flush();
66 | zipOutputStream.closeEntry();
67 | zipOutputStream.close();
68 | fsDataOutputStream.write(byteArrayOutputStream.toByteArray());
69 | byteArrayOutputStream.flush();
70 | fsDataOutputStream.flush();
71 | }
72 |
73 | @Override
74 | public void close() throws IOException {
75 | noya.close();
76 | byteArrayOutputStream.close();
77 | fsDataOutputStream.close();
78 | }
79 |
80 | @Override
81 | public void write(int b) throws IOException {
82 | noya.write(b);
83 | }
84 |
85 | @Override
86 | public String getPath() {
87 | return Out_PathStr;
88 | }
89 |
90 | @Override
91 | public Writer toToObject() {
92 | return this;
93 | }
94 |
95 | @Override
96 | public void write(byte[] b) throws IOException {
97 | noya.write(b);
98 | }
99 | }
100 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearRW/hdfs/HDFSWriterZIPBuilder.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearRW.hdfs;
2 |
3 | import org.apache.hadoop.conf.Configuration;
4 | import org.apache.hadoop.fs.FileSystem;
5 | import org.apache.hadoop.fs.Path;
6 | import zhao.io.dataTear.atzhaoPublic.Builder;
7 | import zhao.io.dataTear.atzhaoPublic.Priority;
8 |
9 | import java.io.IOException;
10 |
11 | /**
12 | * HDFS数据输出组件建造者 HDFS data output component builder
13 | */
14 | public class HDFSWriterZIPBuilder implements Builder
24 | * The character encoding set class to be used can be obtained through the "StandardCharsets.XXX" enumeration class
25 | * @return 链
26 | */
27 | public HDFSWriterZIPBuilder setCharset(String charset) {
28 | this.charset = charset;
29 | return this;
30 | }
31 |
32 | /**
33 | * 定位HDFS信息
34 | * 如果没有进行过过FileSystem的设置,本设置将会生效,也就是说 它是一种备用方法,它将会通过IP与端口找到HDFS集群
35 | *
36 | * Locate HDFS information
37 | * If FileSystem has not been set, this setting will take effect, that is to say, it is an alternate method, it will find the HDFS cluster by IP and port
38 | *
39 | * @param IP HDFS集群通讯地址 一般是主NameNode信息
40 | *
41 | * cluster communication address is generally the main NameNode information
42 | * @param port 通讯端口
43 | * communication port
44 | * @return 链
45 | */
46 | @Priority("2")
47 | public HDFSWriterZIPBuilder setIP_port(String IP, String port) {
48 | configuration.set("fs.default.name", "hdfs://" + IP + ":" + port);
49 | return this;
50 | }
51 |
52 | /**
53 | * 定制更多配置信息
54 | * 如果没有进行过过FileSystem的设置,本设置将会生效,也就是说 它是一种备用方法
55 | *
56 | * Customize more configuration information If no FileSystem settings have been made, this setting will take effect, which means it is an alternate method
57 | *
58 | * @param key HDFS配置名称
59 | * @param value 配置参数
60 | * @return 链
61 | */
62 | @Priority("2")
63 | public HDFSWriterZIPBuilder setKV(String key, String value) {
64 | configuration.set(key, value);
65 | return this;
66 | }
67 |
68 | /**
69 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
70 | *
71 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
72 | *
73 | * @param pathString 设置文件输出路径 set file path
74 | * @return 链
75 | */
76 | @Priority("3")
77 | public HDFSWriterZIPBuilder setPathString(String pathString) {
78 | this.pathString = pathString;
79 | return this;
80 | }
81 |
82 | /**
83 | * 定位输出路径 该方法不一定会被调用,因为针对文件输出路径的设置由DataTear去实现
84 | *
85 | * Locate the output path This method is not necessarily called, because the settings for the file output path are implemented by DataTear
86 | *
87 | * @param pathObject 设置文件路径对象 set file path
88 | * @return 链
89 | */
90 | @Priority("2")
91 | public HDFSWriterZIPBuilder setPathObject(Path pathObject) {
92 | this.pathObject = pathObject;
93 | return this;
94 | }
95 |
96 | /**
97 | * 直接通过FileSystem对象构建输出组件,这个是非常推荐的方法
98 | *
99 | * Build the output component directly from the File System object, this is a very recommended method
100 | *
101 | * @param fileSystem HDFS file System
102 | * @return 链
103 | */
104 | @Priority("1")
105 | public HDFSWriterZIPBuilder setFileSystem(FileSystem fileSystem) {
106 | this.fileSystem = fileSystem;
107 | return this;
108 | }
109 |
110 | @Override
111 | public HDFSWriterZIP create() {
112 | try {
113 | if (pathObject == null) pathObject = new Path(pathString);
114 | return new HDFSWriterZIP(fileSystem, pathObject, charset);
115 | } catch (IOException e) {
116 | e.printStackTrace();
117 | return null;
118 | }
119 | }
120 | }
121 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/DTBulider.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams;
2 |
3 | import zhao.io.dataTear.atzhaoPublic.Builder;
4 |
5 | import java.io.BufferedOutputStream;
6 | import java.io.FileNotFoundException;
7 | import java.io.FileOutputStream;
8 |
9 | public class DTBulider implements Builder
9 | * 使用者可以理解为它就是一个数据操作模式,这样调用起来会更加的亲近
10 | *
11 | * 它作为 UDF数据流函数参数 会被算法库获取并提供对应的数据算法流
12 | */
13 | public enum DT_builtIn_UDF {
14 | /**
15 | * 通过支持SQL的DataBase算法流构造Text的DT数据库 需要注意的是,要进行一个强转
16 | *
17 | * @see zhao.io.dataTear.dataOp.dataTearStreams.dbStream.DataBaseStream
18 | */
19 | SQLDB_TEXT,
20 | /**
21 | * HDFS中构造Text的DT目录 需要注意的是,要进行一个强转
22 | *
23 | * @see HDFSTextStream
24 | */
25 | HDFS_TEXT,
26 | /**
27 | * HDFS中构造ZIP的DT目录 需要注意的是,要进行一个强转
28 | *
29 | * @see HDFSZIPStream
30 | */
31 | HDFS_ZIP,
32 | /**
33 | * HDFS中构造ZIP的DT目录 需要注意的是,要进行一个强转
34 | *
35 | * @see HDFSGZIPStream
36 | */
37 | HDFS_GZIP,
38 | /**
39 | * HDFS中构造BZIP2的DT目录 需要注意的是,要进行一个强转
40 | *
41 | * @see HDFSBZIP2Stream
42 | */
43 | HDFS_BZIP2,
44 | /**
45 | * HDFS中构造BZIP2的DT目录 需要注意的是,要进行一个强转
46 | *
47 | * @see HDFSSnappyStream
48 | */
49 | HDFS_SNAPPY,
50 | /**
51 | * 本地文件系统中构造Text的DT目录
52 | *
53 | * @see LocalBufferStream
54 | */
55 | LOCAL_TEXT,
56 | /**
57 | * 本地文件系统中构造ZIP处理的的DT目录
58 | *
59 | * @see LocalZIPStream
60 | */
61 | LOCAL_ZIP,
62 | /**
63 | * 本地文件系统中构造GZIP处理的DT目录
64 | *
65 | * @see LocalGZIPStream
66 | */
67 | LOCAL_GZIP,
68 | /**
69 | * 本地文件系统中构造BZIP2处理的DT目录
70 | *
71 | * @see LocalBZIP2Stream
72 | */
73 | LOCAL_BZIP2,
74 | /**
75 | * 本地文件系统中构造Snappy处理的DT目录
76 | *
77 | * @see LocalSnappyStream
78 | */
79 | LOCAL_SNAPPY
80 | }
81 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/hdfsStream/HDFSBZIP2Stream.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams.hdfsStream;
2 |
3 | import org.apache.hadoop.fs.FileSystem;
4 | import org.apache.hadoop.fs.Path;
5 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
6 | import zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSReaderBZIP2;
7 | import zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSWriterBZIP2;
8 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
9 | import zhao.io.ex.ZHAOLackOfInformation;
10 |
11 | import java.io.IOException;
12 | import java.io.OutputStream;
13 |
14 | /**
15 | * 向HDFS上面输出ZIP的DT目录
16 | *
17 | * @author zhao
18 | */
19 | public class HDFSBZIP2Stream implements DT_StreamBase {
20 | FileSystem fileSystem;
21 |
22 | /**
23 | * @param fileSystem 使用FileSystem连接,注意需要使用强转,转换为此类,因为该方法是本类的特有方法,接口中并不包含
24 | * @return 链
25 | */
26 | public HDFSBZIP2Stream setFileSystem(FileSystem fileSystem) {
27 | this.fileSystem = fileSystem;
28 | return this;
29 | }
30 |
31 | @Override
32 | public Reader readStream(String inPath) throws IOException {
33 | try {
34 | if (fileSystem == null) {
35 | throw new NullPointerException();
36 | }
37 | Path path = new Path(inPath);
38 | return HDFSReaderBZIP2
39 | .builder()
40 | .setFileSystem(fileSystem)
41 | .setPathObject(path)
42 | .setPathString(inPath)
43 | .create()
44 | .setInputStream(fileSystem.open(path));
45 | } catch (NullPointerException e) {
46 | String elog = "您好,您使用了 " + this.getClass().getName() + " 但是您传入的参数似乎为空哦!解决方案:转换为HDFSBZIP2Stream,设置FileSystem然后获取readStream设置输入路径。";
47 | logger1.error(elog);
48 | throw new ZHAOLackOfInformation(elog);
49 | }
50 | }
51 |
52 | @Override
53 | public OutputStream writeStream(String outPath) throws IOException {
54 | try {
55 | if (fileSystem == null) {
56 | throw new ZHAOLackOfInformation("");
57 | }
58 | return HDFSWriterBZIP2
59 | .builder()
60 | .setFileSystem(fileSystem)
61 | .setPathObject(new Path(outPath))
62 | .create();
63 | } catch (ZHAOLackOfInformation e) {
64 | String elog = "您好,您使用了 " + this.getClass().getName() + " 但是您传入的参数似乎为空哦!解决方案:转换为HDFSBZIP2Stream,设置FileSystem然后获取WriteStream设置输入路径。";
65 | logger1.error(elog);
66 | throw new ZHAOLackOfInformation(elog);
67 | }
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/hdfsStream/HDFSGZIPStream.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams.hdfsStream;
2 |
3 | import org.apache.hadoop.fs.FileSystem;
4 | import org.apache.hadoop.fs.Path;
5 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
6 | import zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSReaderGZIP;
7 | import zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSWriterGZIP;
8 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
9 | import zhao.io.ex.ZHAOLackOfInformation;
10 |
11 | import java.io.IOException;
12 | import java.io.OutputStream;
13 |
14 | /**
15 | * 向HDFS上面输出ZIP的DT目录
16 | *
17 | * @author zhao
18 | */
19 | public class HDFSGZIPStream implements DT_StreamBase {
20 | FileSystem fileSystem;
21 |
22 | /**
23 | * @param fileSystem 使用FileSystem连接,注意需要使用强转,转换为此类,因为该方法是本类的特有方法,接口中并不包含
24 | * @return 链
25 | */
26 | public HDFSGZIPStream setFileSystem(FileSystem fileSystem) {
27 | this.fileSystem = fileSystem;
28 | return this;
29 | }
30 |
31 | @Override
32 | public Reader readStream(String inPath) throws IOException {
33 | try {
34 | if (fileSystem == null) {
35 | throw new NullPointerException();
36 | }
37 | Path path = new Path(inPath);
38 | return HDFSReaderGZIP
39 | .builder()
40 | .setFileSystem(fileSystem)
41 | .setPathObject(path)
42 | .setPathString(inPath)
43 | .create()
44 | .setInputStream(fileSystem.open(path));
45 | } catch (NullPointerException e) {
46 | String elog = "您好,您使用了 " + this.getClass().getName() + " 但是您传入的参数似乎为空哦!解决方案:转换为HDFSGZIPStream,设置FileSystem然后获取readStream设置输入路径。";
47 | logger1.error(elog);
48 | throw new ZHAOLackOfInformation(elog);
49 | }
50 | }
51 |
52 | @Override
53 | public OutputStream writeStream(String outPath) throws IOException {
54 | try {
55 | if (fileSystem == null) {
56 | throw new ZHAOLackOfInformation("");
57 | }
58 | return HDFSWriterGZIP
59 | .builder().
60 | setFileSystem(fileSystem)
61 | .setPathObject(new Path(outPath))
62 | .create();
63 | } catch (ZHAOLackOfInformation e) {
64 | String elog = "您好,您使用了 " + this.getClass().getName() + " 但是您传入的参数似乎为空哦!解决方案:转换为HDFSGZIPStream,设置FileSystem然后获取WriteStream设置输入路径。";
65 | logger1.error(elog);
66 | throw new ZHAOLackOfInformation(elog);
67 | }
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/hdfsStream/HDFSSnappyStream.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams.hdfsStream;
2 |
3 | import org.apache.hadoop.fs.FileSystem;
4 | import org.apache.hadoop.fs.Path;
5 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
6 | import zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSReaderSnappy;
7 | import zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSWriterSnappy;
8 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
9 | import zhao.io.ex.ZHAOLackOfInformation;
10 |
11 | import java.io.IOException;
12 | import java.io.OutputStream;
13 |
14 | /**
15 | * 向HDFS上面输出ZIP的DT目录
16 | *
17 | * @author zhao
18 | */
19 | public class HDFSSnappyStream implements DT_StreamBase {
20 | FileSystem fileSystem;
21 |
22 | /**
23 | * @param fileSystem 使用FileSystem连接,注意需要使用强转,转换为此类,因为该方法是本类的特有方法,接口中并不包含
24 | * @return 链
25 | */
26 | public HDFSSnappyStream setFileSystem(FileSystem fileSystem) {
27 | this.fileSystem = fileSystem;
28 | return this;
29 | }
30 |
31 | @Override
32 | public Reader readStream(String inPath) throws IOException {
33 | try {
34 | if (fileSystem == null) {
35 | throw new NullPointerException();
36 | }
37 | Path path = new Path(inPath);
38 | return HDFSReaderSnappy
39 | .builder()
40 | .setFileSystem(fileSystem)
41 | .setPathObject(path)
42 | .setPathString(inPath)
43 | .create()
44 | .setInputStream(fileSystem.open(path));
45 | } catch (NullPointerException e) {
46 | String elog = "您好,您使用了 " + this.getClass().getName() + " 但是您传入的参数似乎为空哦!解决方案:转换为HDFSSnappyStream,设置FileSystem然后获取readStream设置输入路径。";
47 | logger1.error(elog);
48 | throw new ZHAOLackOfInformation(elog);
49 | }
50 | }
51 |
52 | @Override
53 | public OutputStream writeStream(String outPath) throws IOException {
54 | try {
55 | if (fileSystem == null) {
56 | throw new ZHAOLackOfInformation("");
57 | }
58 | return HDFSWriterSnappy
59 | .builder()
60 | .setFileSystem(fileSystem)
61 | .setPathObject(new Path(outPath))
62 | .create();
63 | } catch (ZHAOLackOfInformation e) {
64 | String elog = "您好,您使用了 " + this.getClass().getName() + " 但是您传入的参数似乎为空哦!解决方案:转换为HDFSSnappyStream,设置FileSystem然后获取WriteStream设置输入路径。";
65 | logger1.error(elog);
66 | throw new ZHAOLackOfInformation(elog);
67 | }
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/hdfsStream/HDFSTextStream.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams.hdfsStream;
2 |
3 | import org.apache.hadoop.fs.FileSystem;
4 | import org.apache.hadoop.fs.Path;
5 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
6 | import zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSReader;
7 | import zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSWriter;
8 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
9 |
10 | import java.io.IOException;
11 | import java.io.OutputStream;
12 |
13 | /**
14 | * 向HDFS上面输出文本的DT目录
15 | *
16 | * @author zhao
17 | */
18 | public class HDFSTextStream implements DT_StreamBase {
19 | FileSystem fileSystem;
20 |
21 | /**
22 | * @param fileSystem 使用FileSystem连接,注意需要使用强转,转换为此类,因为该方法是本类的特有方法,接口中并不包含
23 | * @return 链
24 | */
25 | public HDFSTextStream setFileSystem(FileSystem fileSystem) {
26 | this.fileSystem = fileSystem;
27 | return this;
28 | }
29 |
30 | /**
31 | * @param inPath 输出路径
32 | * @return HDFS写数据流
33 | */
34 | @Override
35 | public Reader readStream(String inPath) throws IOException {
36 | try {
37 | Path path = new Path(inPath);
38 | return HDFSReader.builder().setFileSystem(fileSystem).setPathObject(path).create().setInputStream(fileSystem.open(path));
39 | } catch (NullPointerException e) {
40 | String elog = "您好,您使用了 " + this.getClass().getName() + " 但是您传入的参数似乎为空哦!解决方案:转换为HDFSTextDStream,设置FileSystem然后获取readStream设置输入路径。";
41 | logger1.error(elog);
42 | throw new IOException(elog);
43 | }
44 | }
45 |
46 | /**
47 | * @param outPath 输出路径
48 | * @return HDFS写数据流
49 | */
50 | @Override
51 | public OutputStream writeStream(String outPath) throws IOException {
52 | try {
53 | return HDFSWriter.builder().setFileSystem(fileSystem).setPathString(outPath).create();
54 | } catch (ArrayIndexOutOfBoundsException e) {
55 | throw new IOException("您好,您使用了 " + this.getClass().getName() + " 但是您传入的参数似乎为空哦!解决方案:转换为HDFSTextDStream,设置FileSystem然后获取writeStream设置输出路径。");
56 | }
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/hdfsStream/HDFSZIPStream.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams.hdfsStream;
2 |
3 | import org.apache.hadoop.fs.FileSystem;
4 | import org.apache.hadoop.fs.Path;
5 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
6 | import zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSReaderZIP;
7 | import zhao.io.dataTear.dataOp.dataTearRW.hdfs.HDFSWriterZIP;
8 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
9 | import zhao.io.ex.ZHAOLackOfInformation;
10 |
11 | import java.io.IOException;
12 | import java.io.OutputStream;
13 |
14 | /**
15 | * 向HDFS上面输出ZIP的DataTear目录
16 | *
17 | * @author zhao
18 | */
19 | public class HDFSZIPStream implements DT_StreamBase {
20 | FileSystem fileSystem;
21 |
22 | /**
23 | * @param fileSystem 使用FileSystem连接,注意需要使用强转,转换为此类,因为该方法是本类的特有方法,接口中并不包含
24 | * @return 链
25 | */
26 | public HDFSZIPStream setFileSystem(FileSystem fileSystem) {
27 | this.fileSystem = fileSystem;
28 | return this;
29 | }
30 |
31 | @Override
32 | public Reader readStream(String inPath) throws IOException {
33 | try {
34 | if (fileSystem == null) {
35 | throw new NullPointerException();
36 | }
37 | Path path = new Path(inPath);
38 | return HDFSReaderZIP.builder().setFileSystem(fileSystem).setPathObject(path).setPathString(inPath).create().setInputStream(fileSystem.open(path));
39 | } catch (NullPointerException e) {
40 | String elog = "您好,您使用了 " + this.getClass().getName() + " 但是您传入的参数似乎为空哦!解决方案:转换为HDFSZIPStream,设置FileSystem然后获取readStream设置输入路径。";
41 | logger1.error(elog);
42 | throw new ZHAOLackOfInformation(elog);
43 | }
44 | }
45 |
46 | @Override
47 | public OutputStream writeStream(String outPath) throws IOException {
48 | try {
49 | if (fileSystem == null) {
50 | throw new ZHAOLackOfInformation("");
51 | }
52 | return HDFSWriterZIP.builder().setFileSystem(fileSystem).setPathObject(new Path(outPath)).create();
53 | } catch (ZHAOLackOfInformation e) {
54 | String elog = "您好,您使用了 " + this.getClass().getName() + " 但是您传入的参数似乎为空哦!解决方案:转换为HDFSZIPStream,设置FileSystem然后获取WriteStream设置输入路径。";
55 | logger1.error(elog);
56 | throw new ZHAOLackOfInformation(elog);
57 | }
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/localStream/LocalBZIP2Stream.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams.localStream;
2 |
3 | import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
4 | import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
5 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
6 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
7 |
8 | import java.io.FileInputStream;
9 | import java.io.FileOutputStream;
10 | import java.io.IOException;
11 | import java.io.OutputStream;
12 |
13 | /**
14 | * 内置实现的自定义流,BZIP2压缩数据
15 | */
16 | public class LocalBZIP2Stream implements DT_StreamBase {
17 |
18 | @Override
19 | public Reader readStream(String inPath) throws IOException {
20 | return new Reader().setInputStream(new BZip2CompressorInputStream(new FileInputStream(inPath)));
21 | }
22 |
23 | @Override
24 | public OutputStream writeStream(String outPath) throws IOException {
25 | return new BZip2CompressorOutputStream(new FileOutputStream(outPath));
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/localStream/LocalBufferStream.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams.localStream;
2 |
3 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
4 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
5 |
6 | import java.io.*;
7 |
8 | /**
9 | * 从本地文件系统读取text存储的数据
10 | */
11 | public class LocalBufferStream implements DT_StreamBase {
12 |
13 | @Override
14 | public Reader readStream(String inPath) throws IOException {
15 | return new Reader().setInputReaderStream(new BufferedReader(new FileReader(inPath)));
16 | }
17 |
18 | @Override
19 | public OutputStream writeStream(String outPath) throws IOException {
20 | return new BufferedOutputStream(new FileOutputStream(outPath));
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/localStream/LocalGZIPStream.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams.localStream;
2 |
3 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
4 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
5 |
6 | import java.io.FileInputStream;
7 | import java.io.FileOutputStream;
8 | import java.io.IOException;
9 | import java.io.OutputStream;
10 | import java.util.zip.GZIPInputStream;
11 | import java.util.zip.GZIPOutputStream;
12 |
13 | /**
14 | * 内置实现的自定义流,GZIP压缩数据
15 | */
16 | public class LocalGZIPStream implements DT_StreamBase {
17 |
18 | @Override
19 | public Reader readStream(String inPath) throws IOException {
20 | return new Reader().setInputStream(new GZIPInputStream(new FileInputStream(inPath)));
21 | }
22 |
23 | @Override
24 | public OutputStream writeStream(String outPath) throws IOException {
25 | return new GZIPOutputStream(new FileOutputStream(outPath));
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/localStream/LocalSnappyStream.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams.localStream;
2 |
3 | import org.xerial.snappy.SnappyInputStream;
4 | import org.xerial.snappy.SnappyOutputStream;
5 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
6 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
7 |
8 | import java.io.FileInputStream;
9 | import java.io.FileOutputStream;
10 | import java.io.IOException;
11 | import java.io.OutputStream;
12 |
13 | /**
14 | * 内置实现的自定义流,Snappy压缩数据
15 | */
16 | public class LocalSnappyStream implements DT_StreamBase {
17 | @Override
18 | public Reader readStream(String inPath) throws IOException {
19 | return new Reader().setInputStream(new SnappyInputStream(new FileInputStream(inPath)));
20 | }
21 |
22 | @Override
23 | public OutputStream writeStream(String outPath) throws IOException {
24 | return new SnappyOutputStream(new FileOutputStream(outPath));
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/dataOp/dataTearStreams/localStream/LocalZIPStream.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.dataOp.dataTearStreams.localStream;
2 |
3 | import zhao.io.dataTear.dataOp.dataTearRW.Reader;
4 | import zhao.io.dataTear.dataOp.dataTearStreams.DT_StreamBase;
5 |
6 | import java.io.FileOutputStream;
7 | import java.io.IOException;
8 | import java.io.OutputStream;
9 | import java.util.Enumeration;
10 | import java.util.zip.ZipEntry;
11 | import java.util.zip.ZipFile;
12 | import java.util.zip.ZipOutputStream;
13 |
14 | /**
15 | * 内置实现的自定义流,ZIP压缩数据
16 | */
17 | public class LocalZIPStream implements DT_StreamBase {
18 |
19 | @Override
20 | public Reader readStream(String inPath) throws IOException {
21 | ZipFile zipFile = new ZipFile(inPath);
22 | Enumeration extends ZipEntry> entries = zipFile.entries();
23 | ZipEntry zipEntry = entries.hasMoreElements() ? entries.nextElement() : null;
24 | if (zipEntry != null) {
25 | return new Reader().setInputStream(zipFile.getInputStream(zipEntry));
26 | } else {
27 | logger1.error("内置的ZIP数据输入组件运行了,但是发生了空指针异常。");
28 | return null;
29 | }
30 | }
31 |
32 | @Override
33 | public OutputStream writeStream(String outPath) throws IOException {
34 | ZipOutputStream zipOutputStream = new ZipOutputStream(new FileOutputStream(outPath));
35 | zipOutputStream.putNextEntry(new ZipEntry("DT-ZHAO"));
36 | return zipOutputStream;
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/ex/AnalysisMetadataException.java:
--------------------------------------------------------------------------------
1 | package zhao.io.ex;
2 |
3 | /**
4 | * 解析元数据错误异常
5 | */
6 | public class AnalysisMetadataException extends DataTearException {
7 | public AnalysisMetadataException() {
8 | super();
9 | }
10 |
11 | public AnalysisMetadataException(String message) {
12 | super(message);
13 | }
14 |
15 | public AnalysisMetadataException(String errorStr, org.slf4j.Logger logger) {
16 | super(errorStr);
17 | super.LoggerToFile(this, logger);
18 | }
19 |
20 | @Override
21 | public String getMessage() {
22 | return super.getMessage() + "NameManager的元数据解析失败!";
23 | }
24 |
25 | @Override
26 | public String getLocalizedMessage() {
27 | return super.getLocalizedMessage() + "NameManager的元数据解析失败!";
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/ex/CommandParsingException.java:
--------------------------------------------------------------------------------
1 | package zhao.io.ex;
2 |
3 | /**
4 | * 命令解析异常
5 | */
6 | public class CommandParsingException extends ArrayIndexOutOfBoundsException {
7 | public CommandParsingException() {
8 | super();
9 | }
10 |
11 | public CommandParsingException(int index) {
12 | super(index);
13 | }
14 |
15 | public CommandParsingException(String s) {
16 | super(s);
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/ex/DataTearException.java:
--------------------------------------------------------------------------------
1 | package zhao.io.ex;
2 |
3 | import java.io.IOException;
4 | import java.io.PrintWriter;
5 | import java.io.StringWriter;
6 |
7 | /**
8 | * DataTear异常类,其中包含一个将异常堆栈打印到日志中的构造函数
9 | */
10 | public abstract class DataTearException extends NullPointerException {
11 |
12 | public DataTearException() {
13 | }
14 |
15 | public DataTearException(String errorStr) {
16 | super(errorStr);
17 | }
18 |
19 | /**
20 | * 将错误堆栈输出到对应的logger中
21 | *
22 | * @param dataTearException 错误异常
23 | * @param logger 日志输出器
24 | */
25 | public void LoggerToFile(Throwable dataTearException, org.slf4j.Logger logger) {
26 | StringWriter sw = null;
27 | PrintWriter pw = null;
28 | try {
29 | sw = new StringWriter();
30 | pw = new PrintWriter(sw);
31 | dataTearException.printStackTrace(pw);
32 | pw.flush();
33 | sw.flush();
34 | } finally {
35 | if (sw != null) {
36 | try {
37 | sw.close();
38 | } catch (IOException e1) {
39 | e1.printStackTrace();
40 | }
41 | }
42 | if (pw != null) {
43 | pw.close();
44 | }
45 | }
46 | logger.error(sw.toString());
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/ex/NOCharCode.java:
--------------------------------------------------------------------------------
1 | package zhao.io.ex;
2 |
3 | /**
4 | * 字符编码集设置错误异常
5 | */
6 | public class NOCharCode extends DataTearException {
7 | public NOCharCode() {
8 | super();
9 | }
10 |
11 | public NOCharCode(String s) {
12 | super(s);
13 | }
14 |
15 | public NOCharCode(String errorStr, org.slf4j.Logger logger) {
16 | super(errorStr);
17 | super.LoggerToFile(this, logger);
18 | }
19 |
20 | }
21 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/ex/OutUDFException.java:
--------------------------------------------------------------------------------
1 | package zhao.io.ex;
2 |
3 | public class OutUDFException extends DataTearException {
4 | public OutUDFException(String message) {
5 | super("自定义数据输出流异常!!!");
6 | }
7 |
8 | public OutUDFException(String errorStr, org.slf4j.Logger logger) {
9 | super(errorStr);
10 | super.LoggerToFile(this, logger);
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/ex/ZHAOLackOfInformation.java:
--------------------------------------------------------------------------------
1 | package zhao.io.ex;
2 |
3 | /**
4 | * API调用异常
5 | */
6 | public class ZHAOLackOfInformation extends DataTearException {
7 |
8 | public ZHAOLackOfInformation(String s) {
9 | super(s);
10 | }
11 |
12 | public ZHAOLackOfInformation(String errorStr, org.slf4j.Logger logger) {
13 | super(errorStr);
14 | super.LoggerToFile(this, logger);
15 | }
16 |
17 | @Override
18 | public String getMessage() {
19 | return super.getMessage() + "您的API调用设置不全哦!!";
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/runCli/README.md:
--------------------------------------------------------------------------------
1 | # runCli directory structure
2 |
3 | DaraTear has a built-in client, which is optional. DataTear is mainly a framework, and the client is used for reference
4 | only.
5 |
6 | ### [MAINCli](https://github.com/BeardedManZhao/dataTear/blob/core/src_code/src/main/java/zhao/runCli/MAINCli.java)
7 |
8 | DataTear contains an implemented client, and the client's startup class is this class.
9 |
10 | ### [directive](https://github.com/BeardedManZhao/dataTear/tree/core/src_code/src/main/java/zhao/runCli/directive)
11 |
12 | DataTear contains an implemented client. All the command processing modules in the client are in this package. At
13 | present, they are two modules that read and write DataTear data. These two modules come from the same interface.
14 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/runCli/directive/Execute.java:
--------------------------------------------------------------------------------
1 | package zhao.runCli.directive;
2 |
3 | import java.util.TreeMap;
4 |
5 | /**
6 | * 命令执行类的接口,客户端命令拓展接口
7 | */
8 | public interface Execute {
9 | /**
10 | * 命令执行者列表 key一般来说是命令关键字 value 就是命令执行者
11 | */
12 | TreeMapintroduce
11 | *
12 | * 中文
13 | * 根据properties中配置的路径把jar和配置文件加载到classpath中。
14 | * 此工具类加载类时使用的是SystemClassLoader,如有需要对加载类进行校验,请另外实现自己的加载器
15 | * English
16 | * Load the jar and configuration file into the classpath according to the path configured in properties.
17 | * This tool class uses SystemClassLoader when loading classes. If you need to verify the loaded class, please implement your own loader.
18 | */
19 | public class ExtClasspathLoader {
20 | // private static final Logger LOG = LoggerFactory.getLogger(ExtClasspathLoader.class);
21 |
22 | private static final String JAR_SUFFIX = ".jar";
23 | private static final String ZIP_SUFFIX = ".zip";
24 |
25 | /**
26 | * URLClassLoader的addURL方法
27 | */
28 | private static final Method addURL = initAddMethod();
29 |
30 | /**
31 | * Application Classloader
32 | */
33 | private static final URLClassLoader classloader = (URLClassLoader) ClassLoader.getSystemClassLoader();
34 |
35 | /**
36 | * 初始化addUrl 方法.
37 | *
38 | * @return 可访问addUrl方法的Method对象
39 | */
40 | private static Method initAddMethod() {
41 | try {
42 | Method add = URLClassLoader.class.getDeclaredMethod("addURL", URL.class);
43 | add.setAccessible(true);
44 | return add;
45 | } catch (Exception e) {
46 | throw new RuntimeException(e);
47 | }
48 | }
49 |
50 | /**
51 | * 通过filepath加载文件到classpath。
52 | *
53 | * @param file 文件路径
54 | * @throws Exception 文件url解析异常
55 | */
56 | private static void addURL(File file) throws Exception {
57 | addURL.invoke(classloader, file.toURI().toURL());
58 | }
59 |
60 | /**
61 | * load Resource by Dir
62 | *
63 | * @param file dir
64 | * @throws IOException 加载类插件文件异常
65 | */
66 | public static void loadResource(File file) throws Exception {
67 | // 资源文件只加载路径
68 | System.out.println("load Resource of dir : " + file.getAbsolutePath());
69 | if (file.isDirectory()) {
70 | addURL(file);
71 | File[] subFiles = file.listFiles();
72 | if (subFiles != null) {
73 | for (File tmp : subFiles) {
74 | loadResource(tmp);
75 | }
76 | }
77 | }
78 | }
79 |
80 | /**
81 | * load Classpath by Dir
82 | *
83 | * @param file jar插件的Dir
84 | * @throws IOException 加载所有的类插件jar包
85 | */
86 | public static void loadClasspath(File file) throws Exception {
87 | // System.out.println("* >>> load Classpath of dir : " + file.getAbsolutePath());
88 | if (file.isDirectory()) {
89 | File[] subFiles = file.listFiles();
90 | if (subFiles != null) {
91 | for (File subFile : subFiles) {
92 | loadClasspath(subFile);
93 | }
94 | }
95 | } else {
96 | if (file.getAbsolutePath().endsWith(JAR_SUFFIX) || file.getAbsolutePath().endsWith(ZIP_SUFFIX)) {
97 | addURL(file);
98 | }
99 | }
100 | }
101 | }
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/config/Init_Plug_in.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.config;
2 |
3 | /**
4 | * introduce
5 | *
6 | * 中文
7 | * 初始化插件接口,在自带的客户端 MAINCli 类启动的时候,会调用初始化的所有插件,如果您只需要调用API,那么您不需要关注这个接口,因为它只在MAINCli中被使用
8 | *
9 | * English
10 | * Initialize the plug-in interface. When the built-in client MAINCli class starts, all the initialized plug-ins will be called.
11 | * If you only need to call the API, then you do not need to pay attention to this interface, because it is only used in MAINCli
12 | */
13 | public interface Init_Plug_in {
14 | /**
15 | * @return 该插件的名称
16 | */
17 | String getName();
18 |
19 | /**
20 | * @return 是否允许继续运行
21 | */
22 | boolean run();
23 | }
24 |
--------------------------------------------------------------------------------
/src_code/src/main/java/zhao/io/dataTear/config/LoadClass.java:
--------------------------------------------------------------------------------
1 | package zhao.io.dataTear.config;
2 |
3 | import java.io.File;
4 | import java.util.ArrayList;
5 |
6 | public class LoadClass {
7 | static final ArrayList