├── LICENSE.txt
├── README
├── pom.xml
└── src
├── main
└── java
│ └── org
│ └── msgpack
│ └── hadoop
│ ├── hive
│ ├── MessagePackStorageHandler.java
│ ├── serde2
│ │ ├── MessagePackSerDe.java
│ │ └── lazy
│ │ │ └── LazyMessagePackRow.java
│ └── udf
│ │ ├── GenericUDTFMessagePackArray.java
│ │ └── GenericUDTFMessagePackMap.java
│ ├── io
│ └── MessagePackWritable.java
│ ├── mapred
│ ├── MessagePackInputFormat.java
│ ├── MessagePackOutputFormat.java
│ ├── MessagePackRecordReader.java
│ └── MessagePackRecordWriter.java
│ └── mapreduce
│ ├── input
│ ├── MessagePackInputFormat.java
│ └── MessagePackRecordReader.java
│ └── output
│ ├── MessagePackOutputFormat.java
│ └── MessagePackRecordWriter.java
└── test
└── java
└── org
└── msgpack
└── hadoop
├── hive
└── serde2
│ └── TestMessagePackSerDe.java
├── io
└── TestMessagePackWritable.java
└── mapreduce
└── input
└── TestMessagePackInputFormat.java
/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | MessagePack-Hadoop Integration
2 | ========================================
3 |
4 | This package contains the bridge layer between MessagePack (http://msgpack.org)
5 | and Hadoop (http://hadoop.apache.org/) families.
6 |
7 | This enables you to run MR jobs on the MessagePack-formatted data, and also
8 | enables you to issue Hive query language over it.
9 |
10 | MessagePack-Hive adapter enables SQL-based adhoc-query, which takes *nested*
11 | *unstructured* data as input (like JSON, but binary-encoded). Of course, query
12 | is executed with MapReduce framework!
13 |
14 | Here is the sample MessagePack-Hive query, which counts unique user per URL.
15 |
16 | > CREATE EXTERNAL TABLE IF NOT EXISTS mpbin (v string) \
17 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '@' LINES TERMINATED BY '\n' \
18 | LOCATION '/path/to/hdfs/';
19 |
20 | > SELECT url, COUNT(1) \
21 | FROM mpbin LATERAL VIEW msgpack_map(v, 'user', 'url') m AS user, url
22 | GROUP BY txt;
23 |
24 | Required Setup
25 | ========================================
26 |
27 | Please setup Hadoop + Hive system. Either Local, Pseudo-Distributed, or
28 | Distributed environment is OK.
29 |
30 | Hive Getting Started
31 | ========================================
32 |
33 | 1. locate jars
34 |
35 | Put these jars to $HIVE_HOME/lib/ directory.
36 |
37 | * msgpack-hadoop-$version.jar
38 | * msgpack-$version.jar
39 | * javassist-$version.jar
40 |
41 | 2. exec hive shell
42 |
43 | Please execute the following command.
44 |
45 | $ hive --auxpath $HIVE_HOME/lib/msgpack-hadoop-$version.jar,$HIVE_HOME/lib/msgpack-$version.jar,$HIVE_HOME/lib/javassist-$version.jar
46 |
47 | You can skip --auxpath option once modify your hive-site.xml.
48 |
49 |
50 | hive.aux.jars.path
51 | $HIVE_HOME/lib/msgpack-hadoop-$version.jar,$HIVE_HOME/lib/msgpack-$version.jar,$HIVE_HOME/lib/javassist-$version.jar
52 |
53 |
54 | 3. add jar and load custom UDTF function
55 |
56 | This step is required for every Hive query.
57 |
58 | hive> add $HIVE_HOME/lib/msgpack-hadoop-$version.jar
59 | hive> add $HIVE_HOME/lib/msgpack-$version.jar
60 | hive> add $HIVE_HOME/lib/javassist-$version.jar
61 | hive> CREATE TEMPORARY FUNCTION msgpack_map AS 'org.msgpack.hadoop.hive.udf.GenericUDTFMessagePackMap';
62 |
63 | 4. create external table
64 |
65 | Create external table, which points the data directory.
66 |
67 | hive> CREATE EXTERNAL TABLE IF NOT EXISTS mp_table (v string) \
68 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '@' LINES TERMINATED BY '\n' \
69 | LOCATION '/path/to/hdfs/';
70 |
71 | 5. execute the query
72 |
73 | Finally, execute the SELECT query over input data.
74 |
75 | Input msgpack data is unstructured, nested data. Therefore, you need to "map"
76 | MessagePack structure to Hive field name. Actually, you can map the field by
77 | using msgpack_map() UDTF function, and name the fields by "AS" clause.
78 |
79 | hive> SELECT url, COUNT(1) \
80 | FROM mp_table LATERAL VIEW msgpack_map(v, 'user', 'url') m AS user, url
81 | GROUP BY txt;
82 |
83 | Caveats
84 | ========================================
85 |
86 | Currently, MessagePackInputFormat is now unsplittable. Therefore, you need to
87 | manually *shred* the data into small pieces.
88 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 | org.msgpack.hadoop
4 | msgpack-hadoop
5 | 0.1.0
6 | MessagePack-Hadoop Integration
7 |
8 | MessagePack-Hadoop Integration
9 | http://msgpack.org/
10 |
11 |
12 |
13 | The Apache Software License, Version 2.0
14 | http://www.apache.org/licenses/LICENSE-2.0.txt
15 | repo
16 |
17 |
18 |
19 |
20 | scm:git:git://github.com/msgpack/msgpack-hadoop.git
21 | scm:git:git://github.com/msgpack/msgpack-hadoop.git
22 |
23 |
24 |
25 | 0.5.2-devel
26 | 0.20.2
27 | 0.8.0-SNAPSHOT
28 |
29 |
30 |
31 |
32 | junit
33 | junit
34 | 4.8.1
35 | test
36 |
37 |
38 | org.slf4j
39 | slf4j-api
40 | 1.4.3
41 |
42 |
43 | org.slf4j
44 | slf4j-log4j12
45 | 1.4.3
46 |
47 |
48 | commons-logging
49 | commons-logging
50 | 1.0.4
51 |
52 |
53 | org.msgpack
54 | msgpack
55 | ${msgpack.version}
56 |
57 |
58 | org.apache.hadoop
59 | hadoop-core
60 | ${hadoop.version}
61 |
62 |
63 | hsqldb
64 | hsqldb
65 |
66 |
67 | net.sf.kosmosfs
68 | kfs
69 |
70 |
71 | org.eclipse.jdt
72 | core
73 |
74 |
75 | net.java.dev.jets3t
76 | jets3t
77 |
78 |
79 | oro
80 | oro
81 |
82 |
83 |
84 |
85 | org.apache.hive
86 | common
87 | ${hive.version}
88 |
89 |
90 | org.apache.hive
91 | exec
92 | ${hive.version}
93 |
94 |
95 | org.apache.hive
96 | serde
97 | ${hive.version}
98 |
99 |
100 |
101 |
102 |
103 |
104 | src/main/resources
105 |
106 |
107 |
108 |
109 | src/test/resources
110 |
111 |
112 |
113 |
114 |
115 | maven-compiler-plugin
116 |
117 | 1.5
118 | 1.5
119 |
120 |
121 |
122 |
123 | maven-eclipse-plugin
124 | 2.5.1
125 |
126 |
127 |
128 | maven-release-plugin
129 |
130 |
131 | deploy
132 | scm:git://github.com/msgpack/msgpack-hadoop.git
133 |
134 |
135 |
136 |
137 | org.apache.maven.plugins
138 | maven-source-plugin
139 |
140 |
141 | attach-sources
142 |
143 | jar
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 | org.apache.maven.plugins
157 | maven-javadoc-plugin
158 |
159 | ${project.name} ${project.version} API
160 | true
161 | en_US
162 | UTF-8
163 |
164 |
165 |
166 |
167 | org.apache.maven.plugins
168 | maven-jxr-plugin
169 |
170 |
171 |
172 | org.apache.maven.plugins
173 | maven-surefire-report-plugin
174 |
175 |
176 |
177 |
178 |
179 |
180 | apache release
181 | https://repository.apache.org/content/repositories/releases/
182 |
183 |
184 | msgpack.org
185 | MessagePack Maven2 Repository
186 | http://msgpack.org/maven2/
187 |
188 |
189 | repository.jboss.org
190 | https://repository.jboss.org/nexus/content/groups/public/
191 |
192 | false
193 |
194 |
195 |
196 |
197 | apache.snapshots
198 | Apache Development Snapshot Repository
199 | https://repository.apache.org/content/repositories/snapshots/
200 |
201 | false
202 |
203 |
204 | true
205 |
206 |
207 |
208 |
209 |
210 |
211 | false
212 | msgpack.org
213 | Repository at msgpack.org
214 | file://${project.build.directory}/website/maven2/
215 |
216 |
217 | true
218 | msgpack.org
219 | Repository at msgpack.org
220 | file://${project.build.directory}/website/maven2/
221 |
222 |
234 |
235 |
236 |
237 |
238 | release
239 |
240 |
241 |
242 | true
243 | org.apache.maven.plugins
244 | maven-deploy-plugin
245 | 2.4
246 |
247 | true
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
--------------------------------------------------------------------------------
/src/main/java/org/msgpack/hadoop/hive/MessagePackStorageHandler.java:
--------------------------------------------------------------------------------
1 | /*
2 | * MessagePack-Hadoop Integration
3 | *
4 | * Copyright (C) 2009-2011 MessagePack Project
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package org.msgpack.hadoop.hive;
20 |
21 | import java.io.IOException;
22 | import java.util.ArrayList;
23 | import java.util.Arrays;
24 | import java.util.HashSet;
25 | import java.util.List;
26 | import java.util.Map;
27 | import java.util.Properties;
28 | import java.util.Set;
29 |
30 | import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler;
31 | import org.apache.hadoop.hive.serde2.SerDe;
32 | import org.apache.hadoop.hive.serde2.SerDeException;
33 | import org.apache.hadoop.mapred.InputFormat;
34 | import org.apache.hadoop.mapred.OutputFormat;
35 |
36 | import org.msgpack.hadoop.hive.serde2.MessagePackSerDe;
37 | import org.msgpack.hadoop.mapred.MessagePackInputFormat;
38 | import org.msgpack.hadoop.mapred.MessagePackOutputFormat;
39 |
40 | class MessagePackStorageHandler extends DefaultStorageHandler {
41 | @Override
42 | public Class extends InputFormat> getInputFormatClass() {
43 | return MessagePackInputFormat.class;
44 | }
45 |
46 | @Override
47 | public Class extends OutputFormat> getOutputFormatClass() {
48 | return MessagePackOutputFormat.class;
49 | }
50 |
51 | @Override
52 | public Class extends SerDe> getSerDeClass() {
53 | return MessagePackSerDe.class;
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/main/java/org/msgpack/hadoop/hive/serde2/MessagePackSerDe.java:
--------------------------------------------------------------------------------
1 | /*
2 | * MessagePack-Hadoop Integration
3 | *
4 | * Copyright (C) 2009-2011 MessagePack Project
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package org.msgpack.hadoop.hive.serde2;
20 |
21 | import java.util.ArrayList;
22 | import java.util.Arrays;
23 | import java.util.List;
24 | import java.util.Properties;
25 | import java.io.IOException;
26 | import java.io.DataOutputStream;
27 | import java.io.ByteArrayOutputStream;
28 |
29 | import org.apache.commons.codec.binary.Base64;
30 | import org.apache.commons.logging.Log;
31 | import org.apache.commons.logging.LogFactory;
32 | import org.apache.hadoop.conf.Configuration;
33 | import org.apache.hadoop.hive.serde.Constants;
34 | import org.apache.hadoop.hive.serde2.SerDe;
35 | import org.apache.hadoop.hive.serde2.SerDeException;
36 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
37 | import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
38 | import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
39 | import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
40 | import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
41 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
42 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
43 | import org.apache.hadoop.io.Text;
44 | import org.apache.hadoop.io.Writable;
45 |
46 | import org.msgpack.hadoop.io.MessagePackWritable;
47 | import org.msgpack.hadoop.hive.serde2.lazy.LazyMessagePackRow;
48 |
49 | public class MessagePackSerDe implements SerDe {
50 | private static final Log LOG = LogFactory.getLog(MessagePackSerDe.class.getName());
51 |
52 | private SerDeParameters serdeParams_;
53 | private ObjectInspector cachedObjectInspector_;
54 | private LazyMessagePackRow cachedMessagePackRow_;
55 |
56 | @Override
57 | public void initialize(Configuration conf, Properties tbl) throws SerDeException {
58 | String serdeName = getClass().getName();
59 | serdeParams_ = LazySimpleSerDe.initSerdeParams(conf, tbl, serdeName);
60 |
61 | cachedObjectInspector_ = LazyFactory.createLazyStructInspector(
62 | serdeParams_.getColumnNames(),
63 | serdeParams_.getColumnTypes(),
64 | serdeParams_.getSeparators(),
65 | serdeParams_.getNullSequence(),
66 | serdeParams_.isLastColumnTakesRest(),
67 | serdeParams_.isEscaped(),
68 | serdeParams_.getEscapeChar());
69 |
70 | cachedMessagePackRow_ = new LazyMessagePackRow((LazySimpleStructObjectInspector)cachedObjectInspector_);
71 | }
72 |
73 | @Override
74 | public ObjectInspector getObjectInspector() throws SerDeException {
75 | return cachedObjectInspector_;
76 | }
77 |
78 | @Override
79 | public Object deserialize(Writable blob) throws SerDeException {
80 | if (!(blob instanceof MessagePackWritable)) {
81 | throw new SerDeException(getClass().toString()
82 | + ": expects either MessagePackWritable object!");
83 | }
84 | cachedMessagePackRow_.init((MessagePackWritable)blob);
85 | return cachedMessagePackRow_;
86 | }
87 |
88 | @Override
89 | public Class extends Writable> getSerializedClass() {
90 | return MessagePackWritable.class;
91 | }
92 |
93 | @Override
94 | public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
95 | LOG.info(obj.toString());
96 | LOG.info(objInspector.toString());
97 | return null;
98 | }
99 | }
100 |
--------------------------------------------------------------------------------
/src/main/java/org/msgpack/hadoop/hive/serde2/lazy/LazyMessagePackRow.java:
--------------------------------------------------------------------------------
1 | /*
2 | * MessagePack-Hadoop Integration
3 | *
4 | * Copyright (C) 2009-2011 MessagePack Project
5 | *
6 | * Licensed under the Apache License, Version 2.0 (the "License");
7 | * you may not use this file except in compliance with the License.
8 | * You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package org.msgpack.hadoop.hive.serde2.lazy;
20 |
21 | import java.util.ArrayList;
22 | import java.util.Arrays;
23 | import java.util.List;
24 | import java.util.Properties;
25 | import java.io.IOException;
26 | import java.io.DataOutputStream;
27 | import java.io.ByteArrayOutputStream;
28 |
29 | import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
30 | import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
31 | import org.apache.hadoop.hive.serde2.lazy.LazyObject;
32 | import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
33 | import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector;
34 | import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
35 | import org.apache.hadoop.hive.serde2.objectinspector.StructField;
36 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
37 |
38 | import org.msgpack.hadoop.io.MessagePackWritable;
39 |
40 | public class LazyMessagePackRow extends LazyStruct {
41 | private MessagePackWritable result_;
42 | private ArrayList