├── .gitignore ├── README.md ├── pom.xml └── src ├── main └── java │ └── com │ └── esri │ ├── dbf │ ├── DBFField.java │ ├── DBFHeader.java │ ├── DBFReader.java │ └── DBFType.java │ ├── io │ ├── Attributes.java │ ├── PointFeatureWritable.java │ ├── PointWritable.java │ ├── PolygonFeatureWritable.java │ ├── PolygonWritable.java │ └── PolylineMWritable.java │ ├── mapred │ ├── AbstractFeatureReader.java │ ├── AbstractInputFormat.java │ ├── AbstractReader.java │ ├── DBFInputFormat.java │ ├── DBFRecordReader.java │ ├── PointFeatureInputFormat.java │ ├── PointInputFormat.java │ ├── PolygonFeatureInputFormat.java │ └── PolygonInputFormat.java │ ├── mapreduce │ ├── AbstractFeatureReader.java │ ├── AbstractInputFormat.java │ ├── AbstractReader.java │ ├── DBFInputFormat.java │ ├── DBFRecordReader.java │ ├── PointFeatureInputFormat.java │ ├── PointInputFormat.java │ ├── PolygonFeatureInputFormat.java │ └── PolygonInputFormat.java │ └── shp │ ├── ShpHeader.java │ ├── ShpReader.java │ └── ShxReader.java └── test ├── java └── com │ └── esri │ ├── dbf │ └── DBFTest.java │ ├── io │ ├── PointFeatureWritableTest.java │ ├── PointWritableTest.java │ ├── PolygonFeatureWritableTest.java │ ├── PolygonWritableTest.java │ └── PolylineMWritableTest.java │ ├── mapred │ ├── DBFReaderTest.java │ ├── MapredFS.java │ ├── PointFeatureReaderTest.java │ ├── PointReaderTest.java │ ├── PolygonFeatureReaderTest.java │ └── PolygonReaderTest.java │ ├── mapreduce │ ├── DBFReaderTest.java │ ├── MapreduceFS.java │ ├── PointFeatureReaderTest.java │ ├── PointReaderTest.java │ ├── PolygonFeatureReaderTest.java │ └── PolygonReaderTest.java │ ├── shp │ ├── PointTest.java │ ├── PolygonTest.java │ └── PolylineMTest.java │ └── test │ └── MiniFS.java ├── python └── CreatePolylineM.py └── resources ├── log4j.properties ├── testpoint.dbf ├── testpoint.shp ├── testpolygon.dbf ├── testpolygon.shp └── testpolylinem.shp /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | target 3 | 4 | *.class 5 | *.iml 6 | 7 | # Package Files # 8 | *.jar 9 | *.war 10 | *.ear 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Shapefile 2 | ========= 3 | 4 | Simple java library to read point and polygon [shapefiles](http://en.wikipedia.org/wiki/Shapefile). 5 | 6 | In addition, this library contains classes that extend Hadoop `FileInputFormat` and `RecordReader` to enable users to read shapefiles placed in HDFS. 7 | Since shapefiles are relatively "small" to a typical 128MB Hadoop block, the `isSplitable` function implementation always returns false. 8 | That means that only one mapper will be used to read the content of a shapefile in HDFS in an MapReduce job. 9 | The [shapefile specification](http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf) describes an [Index Sequential Access Model](http://en.wikipedia.org/wiki/ISAM) that could be 10 | used to create splits on "large" shapefile. This is on the TODO list :-) 11 | When defining the input path to the job, a user can point to a folder or a set of folders containing both the `.shp` and/or the `.dbf` files. 12 | 13 | Input Format | Writable | Description 14 | --------------------------|-----------------------|------------ 15 | DBFInputFormat | MapWritable | Read a DBF file in HDFS 16 | PointInputFormat | PointWitable | Read an SHP file in HDFS containing simple 2D point geometry 17 | PolygonInputFormat | PolygonWitable | Read an SHP file in HDFS containing simple 2D polygon geometry 18 | PointFeatureInputFormat | PointFeatureWitable | Read SHP/DBF files in HDFS containing simple 2D point feature 19 | PolygonFeatureInputFormat | PolygonFeatureWriable | Read SHP/DBF files in HDFS containing simple 2D polygon feature 20 | 21 | This library depends on the [Esri Geometry API for Java](https://github.com/Esri/geometry-api-java) under the [GIS Tools For Hadoop](http://esri.github.io/gis-tools-for-hadoop/). 22 | 23 | **Clone or pull the latest version of the geometry API and build/install it before building this project.** 24 | 25 | ## Sample MapReduce Job 26 | ``` 27 | public class ShapefileTool extends Configured implements Tool 28 | { 29 | public static void main(final String[] args) throws Exception 30 | { 31 | System.exit(ToolRunner.run(new Configuration(), new ShapefileTool(), args)); 32 | } 33 | 34 | @Override 35 | public int run(final String[] args) throws Exception 36 | { 37 | final int rc; 38 | final JobConf jobConf = new JobConf(getConf(), ShapefileTool.class); 39 | 40 | if (args.length != 2) 41 | { 42 | ToolRunner.printGenericCommandUsage(System.err); 43 | rc = -1; 44 | } 45 | else 46 | { 47 | jobConf.setJobName(ShapefileTool.class.getSimpleName()); 48 | 49 | jobConf.setMapperClass(PolygonFeatureMap.class); 50 | 51 | jobConf.setMapOutputKeyClass(NullWritable.class); 52 | jobConf.setMapOutputValueClass(Writable.class); 53 | 54 | jobConf.setNumReduceTasks(0); 55 | 56 | jobConf.setInputFormat(PolygonFeatureInputFormat.class); 57 | jobConf.setOutputFormat(TextOutputFormat.class); 58 | 59 | FileInputFormat.setInputPaths(jobConf, new Path(args[0])); 60 | final Path outputDir = new Path(args[1]); 61 | outputDir.getFileSystem(jobConf).delete(outputDir, true); 62 | FileOutputFormat.setOutputPath(jobConf, outputDir); 63 | 64 | JobClient.runJob(jobConf); 65 | rc = 0; 66 | } 67 | return rc; 68 | } 69 | } 70 | ``` 71 | 72 | The mapper function emits the value of the `CNTRY_NAME` attribute and the centroid of the polygon envelope as a `Text` instance: 73 | 74 | ``` 75 | final class PolygonFeatureMap 76 | extends MapReduceBase 77 | implements Mapper 78 | { 79 | private final static Text NAME = new Text("CNTRY_NAME"); 80 | 81 | private final Text m_text = new Text(); 82 | private final Envelope m_envelope = new Envelope(); 83 | 84 | public void map( 85 | final LongWritable key, 86 | final PolygonFeatureWritable val, 87 | final OutputCollector collector, 88 | final Reporter reporter) throws IOException 89 | { 90 | val.polygon.queryEnvelope(m_envelope); 91 | final Point center = m_envelope.getCenter(); 92 | m_text.set(String.format("%.6f %.6f %s", 93 | center.getX(), center.getY(), val.attributes.get(NAME).toString())); 94 | collector.collect(NullWritable.get(), m_text); 95 | } 96 | } 97 | ``` 98 | 99 | ## Sample Shp Usage 100 | ``` 101 | final File file = new File("cntry06.shp"); 102 | final FileInputStream fileInputStream = new FileInputStream(file); 103 | try 104 | { 105 | final Envelope envelope = new Envelope(); 106 | final Polygon polygon = new Polygon(); 107 | final ShpReader shpReader = new ShpReader(new DataInputStream(new BufferedInputStream(fileInputStream))); 108 | while (shpReader.hasMore()) 109 | { 110 | shpReader.queryPolygon(polygon); 111 | polygon.queryEnvelope(envelope); 112 | final Point center = envelope.getCenter(); 113 | System.out.format("%.6f %.6f%n", center.getX(), center.getY()); 114 | } 115 | } 116 | finally 117 | { 118 | fileInputStream.close(); 119 | } 120 | ``` 121 | 122 | ## Sample DBF Usage 123 | ``` 124 | final File file = new File("cntry06.dbf"); 125 | final FileInputStream fileInputStream = new FileInputStream(file); 126 | try 127 | { 128 | final Map map = new HashMap(); 129 | final DBFReader dbfReader = new DBFReader(new DataInputStream(new BufferedInputStream(fileInputStream))); 130 | while (dbfReader.readRecordAsMap(map) != null) 131 | { 132 | System.out.println(map); 133 | } 134 | } 135 | finally 136 | { 137 | fileInputStream.close(); 138 | } 139 | ``` 140 | 141 | ## Sample DBF Field Usage 142 | ``` 143 | final File file = new File("cntry06.dbf"); 144 | final FileInputStream fileInputStream = new FileInputStream(file); 145 | try 146 | { 147 | final DBFReader dbfReader = new DBFReader(new DataInputStream(new BufferedInputStream(fileInputStream))); 148 | 149 | System.out.println(dbfReader.getFields()); 150 | 151 | final int numberOfFields = dbfReader.getNumberOfFields(); 152 | byte dataType = dbfReader.nextDataType(); 153 | while (dataType != DBFType.END) 154 | { 155 | System.out.println("-----"); 156 | for (int i = 0; i < numberOfFields; i++) 157 | { 158 | System.out.println(dbfReader.readFieldWritable(i)); 159 | } 160 | dataType = dbfReader.nextDataType(); 161 | } 162 | } 163 | finally 164 | { 165 | fileInputStream.close(); 166 | } 167 | ``` 168 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.esri 6 | Shapefile 7 | 1.4.2 8 | jar 9 | 10 | Shapefile 11 | 12 | 13 | UTF-8 14 | 2.6.0-cdh5.5.0 15 | 16 | 17 | 18 | 19 | Cloudera repository 20 | https://repository.cloudera.com/artifactory/cloudera-repos/ 21 | 22 | false 23 | 24 | 25 | 26 | Akka repository 27 | http://repo.akka.io/releases 28 | 29 | false 30 | 31 | 32 | 33 | 34 | 35 | 36 | commons-io 37 | commons-io 38 | 2.4 39 | 40 | 41 | com.esri.geometry 42 | esri-geometry-api 43 | 1.2.1 44 | 45 | 46 | org.apache.hadoop 47 | hadoop-client 48 | ${hadoop.version} 49 | provided 50 | 51 | 52 | org.apache.hadoop 53 | hadoop-hdfs 54 | ${hadoop.version} 55 | provided 56 | 57 | 58 | org.apache.hadoop 59 | hadoop-minicluster 60 | ${hadoop.version} 61 | test 62 | 63 | 64 | log4j 65 | log4j 66 | 1.2.17 67 | test 68 | 69 | 70 | junit 71 | junit 72 | 4.11 73 | test 74 | 75 | 76 | 77 | 78 | 79 | org.apache.maven.plugins 80 | maven-compiler-plugin 81 | 3.5.1 82 | 83 | 1.6 84 | 1.6 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /src/main/java/com/esri/dbf/DBFField.java: -------------------------------------------------------------------------------- 1 | package com.esri.dbf; 2 | 3 | import org.apache.hadoop.io.BooleanWritable; 4 | import org.apache.hadoop.io.DoubleWritable; 5 | import org.apache.hadoop.io.FloatWritable; 6 | import org.apache.hadoop.io.IntWritable; 7 | import org.apache.hadoop.io.LongWritable; 8 | import org.apache.hadoop.io.NullWritable; 9 | import org.apache.hadoop.io.Text; 10 | import org.apache.hadoop.io.Writable; 11 | 12 | import java.io.DataInput; 13 | import java.io.DataInputStream; 14 | import java.io.IOException; 15 | import java.io.Serializable; 16 | import java.util.GregorianCalendar; 17 | 18 | public class DBFField implements Serializable 19 | { 20 | 21 | public static final int TERMINATOR = 0x0d; 22 | 23 | public String fieldName; /* 0-10 */ 24 | public byte dataType; /* 11 */ 25 | private int reserved1; /* 12-15 */ 26 | public int fieldLength; /* 16 */ 27 | public byte decimalCount; /* 17 */ 28 | private short reserved2; /* 18-19 */ 29 | private byte workAreaId; /* 20 */ 30 | private short reserved3; /* 21-22 */ 31 | private byte setFieldsFlag; /* 23 */ 32 | private byte[] reserved4 = new byte[7]; /* 24-30 */ 33 | private byte indexFieldFlag; /* 31 */ 34 | 35 | private DBFField() 36 | { 37 | } 38 | 39 | public static DBFField read(final DataInput in) throws IOException 40 | { 41 | final DBFField field = new DBFField(); 42 | 43 | final byte firstByte = in.readByte(); /* 0 */ 44 | if (firstByte == TERMINATOR) 45 | { 46 | return null; 47 | } 48 | 49 | final byte[] bytes = new byte[11]; /* 1-10 */ 50 | in.readFully(bytes, 1, 10); 51 | bytes[0] = firstByte; 52 | 53 | int nonZeroIndex = bytes.length - 1; 54 | while (nonZeroIndex >= 0 && bytes[nonZeroIndex] == 0) 55 | { 56 | nonZeroIndex--; 57 | } 58 | field.fieldName = new String(bytes, 0, nonZeroIndex + 1); 59 | 60 | field.dataType = in.readByte(); /* 11 */ 61 | field.reserved1 = in.readInt();// DbfUtils.readLittleEndianInt(in); /* 12-15 */ 62 | field.fieldLength = in.readUnsignedByte(); /* 16 */ 63 | field.decimalCount = in.readByte(); /* 17 */ 64 | field.reserved2 = in.readShort(); // DbfUtils.readLittleEndianShort(in); /* 18-19 */ 65 | field.workAreaId = in.readByte(); /* 20 */ 66 | field.reserved3 = in.readShort();// DbfUtils.readLittleEndianShort(in); /* 21-22 */ 67 | field.setFieldsFlag = in.readByte(); /* 23 */ 68 | in.readFully(field.reserved4); /* 24-30 */ 69 | field.indexFieldFlag = in.readByte(); /* 31 */ 70 | 71 | return field; 72 | } 73 | 74 | public Object readValue(final DataInputStream dataInputStream) throws IOException 75 | { 76 | final byte bytes[] = new byte[fieldLength]; 77 | dataInputStream.readFully(bytes); 78 | 79 | switch (dataType) 80 | { 81 | case 'C': 82 | return new String(bytes).trim(); 83 | case 'D': 84 | return readTimeInMillis(bytes); 85 | case 'F': 86 | return readFloat(bytes); 87 | case 'L': 88 | return readLogical(bytes); 89 | case 'N': 90 | if (decimalCount == 0) 91 | { 92 | if (fieldLength < 5) 93 | { 94 | return readShort(bytes); 95 | } 96 | if (fieldLength < 8) 97 | { 98 | return readInteger(bytes); 99 | } 100 | return readLong(bytes); 101 | } 102 | else 103 | { 104 | return readDouble(bytes); 105 | } 106 | default: 107 | return null; 108 | } 109 | } 110 | 111 | public Writable readWritable(final DataInputStream dataInputStream) throws IOException 112 | { 113 | final byte bytes[] = new byte[fieldLength]; 114 | dataInputStream.readFully(bytes); 115 | 116 | switch (dataType) 117 | { 118 | case 'C': 119 | return new Text(bytes); 120 | case 'D': 121 | return new LongWritable(readTimeInMillis(bytes)); 122 | case 'F': 123 | return new FloatWritable(readFloat(bytes)); 124 | case 'L': 125 | return new BooleanWritable(readLogical(bytes)); 126 | case 'N': 127 | if (decimalCount == 0) 128 | { 129 | if (fieldLength < 8) 130 | { 131 | return new IntWritable(readInteger(bytes)); 132 | } 133 | return new LongWritable(readLong(bytes)); 134 | } 135 | else 136 | { 137 | return new DoubleWritable(readDouble(bytes)); 138 | } 139 | default: 140 | return NullWritable.get(); 141 | } 142 | } 143 | 144 | private int parseInt( 145 | final byte[] bytes, 146 | final int from, 147 | final int to) 148 | { 149 | int result = 0; 150 | for (int i = from; i < to && i < bytes.length; i++) 151 | { 152 | result *= 10; 153 | result += bytes[i] - '0'; 154 | } 155 | return result; 156 | } 157 | 158 | private short parseShort( 159 | final byte[] bytes, 160 | final int from, 161 | final int to) 162 | { 163 | short result = 0; 164 | for (int i = from; i < to && i < bytes.length; i++) 165 | { 166 | result *= 10; 167 | result += bytes[i] - '0'; 168 | } 169 | return result; 170 | } 171 | 172 | private long parseLong( 173 | final byte[] bytes, 174 | final int from, 175 | final int to) 176 | { 177 | long result = 0L; 178 | for (int i = from; i < to && i < bytes.length; i++) 179 | { 180 | result *= 10L; 181 | result += bytes[i] - '0'; 182 | } 183 | return result; 184 | } 185 | 186 | private int trimSpaces(final byte[] bytes) 187 | { 188 | int i = 0, l = bytes.length; 189 | while (i < l) 190 | { 191 | if (bytes[i] != ' ') 192 | { 193 | break; 194 | } 195 | i++; 196 | } 197 | return i; 198 | } 199 | 200 | private long readTimeInMillis(final byte[] bytes) throws IOException 201 | { 202 | int year = parseInt(bytes, 0, 4); 203 | int month = parseInt(bytes, 4, 6); 204 | int day = parseInt(bytes, 6, 8); 205 | return new GregorianCalendar(year, month - 1, day).getTimeInMillis(); 206 | } 207 | 208 | private boolean readLogical(final byte[] bytes) throws IOException 209 | { 210 | return bytes[0] == 'Y' || bytes[0] == 'y' || bytes[0] == 'T' || bytes[0] == 't'; 211 | } 212 | 213 | private short readShort(final byte[] bytes) throws IOException 214 | { 215 | final int index = trimSpaces(bytes); 216 | final int length = bytes.length - index; 217 | if (length == 0 || bytes[index] == '?') 218 | { 219 | return 0; 220 | } 221 | return parseShort(bytes, index, bytes.length); 222 | } 223 | 224 | private int readInteger(final byte[] bytes) throws IOException 225 | { 226 | final int index = trimSpaces(bytes); 227 | final int length = bytes.length - index; 228 | if (length == 0 || bytes[index] == '?') 229 | { 230 | return 0; 231 | } 232 | return parseInt(bytes, index, bytes.length); 233 | } 234 | 235 | private long readLong(final byte[] bytes) throws IOException 236 | { 237 | final int index = trimSpaces(bytes); 238 | final int length = bytes.length - index; 239 | if (length == 0 || bytes[index] == '?') 240 | { 241 | return 0L; 242 | } 243 | return parseLong(bytes, index, bytes.length); 244 | } 245 | 246 | private float readFloat(final byte[] bytes) throws IOException 247 | { 248 | final int index = trimSpaces(bytes); 249 | final int length = bytes.length - index; 250 | if (length == 0 || bytes[index] == '?') 251 | { 252 | return 0.0F; 253 | } 254 | // TODO - inline float reader 255 | return Float.parseFloat(new String(bytes, index, length)); 256 | } 257 | 258 | private double readDouble(final byte[] bytes) throws IOException 259 | { 260 | final int index = trimSpaces(bytes); 261 | final int length = bytes.length - index; 262 | if (length == 0 || bytes[index] == '?') 263 | { 264 | return 0.0; 265 | } 266 | // TODO - inline double reader 267 | return Double.parseDouble(new String(bytes, index, length)); 268 | } 269 | 270 | @Override 271 | public String toString() 272 | { 273 | final char c = (char) dataType; 274 | final StringBuilder sb = new StringBuilder("DBFField{"); 275 | sb.append("fieldName='") 276 | .append(fieldName) 277 | .append('\''); 278 | sb.append(", dataType='") 279 | .append(c) 280 | .append('\''); 281 | sb.append(", fieldLength=") 282 | .append(fieldLength); 283 | sb.append(", decimalCount=") 284 | .append(decimalCount); 285 | sb.append('}'); 286 | return sb.toString(); 287 | } 288 | } 289 | -------------------------------------------------------------------------------- /src/main/java/com/esri/dbf/DBFHeader.java: -------------------------------------------------------------------------------- 1 | package com.esri.dbf; 2 | 3 | import org.apache.commons.io.EndianUtils; 4 | 5 | import java.io.DataInputStream; 6 | import java.io.IOException; 7 | import java.io.Serializable; 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | /** 12 | */ 13 | public class DBFHeader implements Serializable 14 | { 15 | 16 | private byte signature; /* 0 */ 17 | public byte year; /* 1 */ 18 | public byte month; /* 2 */ 19 | public byte day; /* 3 */ 20 | public int numberOfRecords; /* 4-7 */ 21 | public short headerLength; /* 8-9 */ 22 | public short recordLength; /* 10-11 */ 23 | private short reserved1; /* 12-13 */ 24 | private byte incompleteTransaction; /* 14 */ 25 | private byte encryptionFlag; /* 15 */ 26 | private int freeRecordThread; /* 16-19 */ 27 | private int reserved2; /* 20-23 */ 28 | private int reserved3; /* 24-27 */ 29 | private byte mdxFlag; /* 28 */ 30 | private byte languageDriver; /* 29 */ 31 | private short reserved4; /* 30-31 */ 32 | public List fields; /* each 32 bytes */ 33 | public int numberOfFields; 34 | 35 | public static DBFHeader read(final DataInputStream dataInput) throws IOException 36 | { 37 | final DBFHeader header = new DBFHeader(); 38 | 39 | header.signature = dataInput.readByte(); /* 0 */ 40 | header.year = dataInput.readByte(); /* 1 */ 41 | header.month = dataInput.readByte(); /* 2 */ 42 | header.day = dataInput.readByte(); /* 3 */ 43 | header.numberOfRecords = EndianUtils.readSwappedInteger(dataInput); //DbfUtils.readLittleEndianInt(dataInput); /* 4-7 */ 44 | 45 | header.headerLength = EndianUtils.readSwappedShort(dataInput);//DbfUtils.readLittleEndianShort(dataInput); /* 8-9 */ 46 | header.recordLength = EndianUtils.readSwappedShort(dataInput);//DbfUtils.readLittleEndianShort(dataInput); /* 10-11 */ 47 | 48 | header.reserved1 = dataInput.readShort();//DbfUtils.readLittleEndianShort(dataInput); /* 12-13 */ 49 | header.incompleteTransaction = dataInput.readByte(); /* 14 */ 50 | header.encryptionFlag = dataInput.readByte(); /* 15 */ 51 | header.freeRecordThread = dataInput.readInt();//DbfUtils.readLittleEndianInt(dataInput); /* 16-19 */ 52 | header.reserved2 = dataInput.readInt(); /* 20-23 */ 53 | header.reserved3 = dataInput.readInt(); /* 24-27 */ 54 | header.mdxFlag = dataInput.readByte(); /* 28 */ 55 | header.languageDriver = dataInput.readByte(); /* 29 */ 56 | header.reserved4 = dataInput.readShort();//DbfUtils.readLittleEndianShort(dataInput); /* 30-31 */ 57 | 58 | header.fields = new ArrayList(); 59 | DBFField field; 60 | while ((field = DBFField.read(dataInput)) != null) 61 | { 62 | header.fields.add(field); 63 | } 64 | header.numberOfFields = header.fields.size(); 65 | return header; 66 | } 67 | 68 | public DBFField getField(final int i) 69 | { 70 | return fields.get(i); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/com/esri/dbf/DBFReader.java: -------------------------------------------------------------------------------- 1 | package com.esri.dbf; 2 | 3 | import org.apache.hadoop.io.Writable; 4 | 5 | import java.io.DataInputStream; 6 | import java.io.IOException; 7 | import java.io.Serializable; 8 | import java.util.ArrayList; 9 | import java.util.HashMap; 10 | import java.util.List; 11 | import java.util.Map; 12 | 13 | /** 14 | * Based on https://code.google.com/p/javadbf/ 15 | */ 16 | public class DBFReader implements Serializable 17 | { 18 | private final transient DataInputStream m_dataInputStream; 19 | private final transient DBFHeader m_header; 20 | 21 | public DBFReader(final DataInputStream dataInputStream) throws IOException 22 | { 23 | m_dataInputStream = dataInputStream; 24 | m_header = DBFHeader.read(dataInputStream); 25 | } 26 | 27 | public Map readRecordAsMap(final Map map) throws IOException 28 | { 29 | final byte dataType = nextDataType(); 30 | if (dataType == DBFType.END) 31 | { 32 | return null; 33 | } 34 | for (final DBFField field : m_header.fields) 35 | { 36 | map.put(field.fieldName, field.readValue(m_dataInputStream)); 37 | } 38 | return map; 39 | } 40 | 41 | public Map readRecordAsMap() throws IOException 42 | { 43 | return readRecordAsMap(new HashMap()); 44 | } 45 | 46 | public Object[] createValueArray() 47 | { 48 | return new Object[m_header.numberOfFields]; 49 | } 50 | 51 | private Object[] queryValues(final Object[] values) throws IOException 52 | { 53 | final int numberOfFields = m_header.numberOfFields; 54 | for (int i = 0; i < numberOfFields; i++) 55 | { 56 | values[i] = readFieldValue(i); 57 | } 58 | return values; 59 | } 60 | 61 | public Object[] queryRecord(final Object[] values) throws IOException 62 | { 63 | final byte dataType = nextDataType(); 64 | if (dataType == DBFType.END) 65 | { 66 | return null; 67 | } 68 | return queryValues(values); 69 | } 70 | 71 | public Object[] readRecord() throws IOException 72 | { 73 | final byte dataType = nextDataType(); 74 | if (dataType == DBFType.END) 75 | { 76 | return null; 77 | } 78 | return queryValues(createValueArray()); 79 | } 80 | 81 | public List readValues() throws IOException 82 | { 83 | final List values = new ArrayList(); 84 | final int numberOfFields = m_header.numberOfFields; 85 | for (int i = 0; i < numberOfFields; i++) 86 | { 87 | values.add(readFieldValue(i)); 88 | } 89 | return values; 90 | } 91 | 92 | public List getFields() 93 | { 94 | return m_header.fields; 95 | } 96 | 97 | public int getNumberOfFields() 98 | { 99 | return m_header.numberOfFields; 100 | } 101 | 102 | public int getNumberOfRecords() 103 | { 104 | return m_header.numberOfRecords; 105 | } 106 | 107 | public byte nextDataType() throws IOException 108 | { 109 | byte dataType; 110 | do 111 | { 112 | dataType = m_dataInputStream.readByte(); 113 | if (dataType == DBFType.END) 114 | { 115 | break; 116 | } 117 | else if (dataType == DBFType.DELETED) 118 | { 119 | skipRecord(); 120 | } 121 | } 122 | while (dataType == DBFType.DELETED); 123 | return dataType; 124 | } 125 | 126 | public void skipRecord() throws IOException 127 | { 128 | m_dataInputStream.skipBytes(m_header.recordLength - 1); 129 | } 130 | 131 | public Object readFieldValue(final int index) throws IOException 132 | { 133 | return m_header.getField(index) 134 | .readValue(m_dataInputStream); 135 | } 136 | 137 | public Writable readFieldWritable(final int index) throws IOException 138 | { 139 | return m_header.getField(index) 140 | .readWritable(m_dataInputStream); 141 | } 142 | 143 | } 144 | -------------------------------------------------------------------------------- /src/main/java/com/esri/dbf/DBFType.java: -------------------------------------------------------------------------------- 1 | package com.esri.dbf; 2 | 3 | import java.io.Serializable; 4 | 5 | /** 6 | */ 7 | public final class DBFType implements Serializable 8 | { 9 | 10 | public final static byte END = 0x1A; 11 | public final static byte DELETED = 0x2A; 12 | 13 | private DBFType() 14 | { 15 | } 16 | 17 | } -------------------------------------------------------------------------------- /src/main/java/com/esri/io/Attributes.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import org.apache.hadoop.io.LongWritable; 4 | import org.apache.hadoop.io.MapWritable; 5 | import org.apache.hadoop.io.Text; 6 | import org.apache.hadoop.io.Writable; 7 | 8 | /** 9 | */ 10 | public class Attributes extends MapWritable 11 | { 12 | public String getText(final String key) 13 | { 14 | final Writable writable = get(new Text(key)); 15 | return writable == null ? "" : writable.toString(); 16 | } 17 | 18 | public long getLong(final String key) 19 | { 20 | return getLong(new Text(key)); 21 | } 22 | 23 | public long getLong( 24 | final Text key 25 | ) 26 | { 27 | return getLong(key, 0L); 28 | } 29 | 30 | public long getLong( 31 | final Text key, 32 | final long defaultValue) 33 | { 34 | final long rv; 35 | final Writable writable = get(key); 36 | if (writable instanceof LongWritable) 37 | { 38 | rv = ((LongWritable) writable).get(); 39 | } 40 | else 41 | { 42 | rv = defaultValue; 43 | } 44 | return rv; 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/com/esri/io/PointFeatureWritable.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import java.io.DataInput; 4 | import java.io.DataOutput; 5 | import java.io.IOException; 6 | 7 | /** 8 | */ 9 | public class PointFeatureWritable extends PointWritable 10 | { 11 | public final Attributes attributes = new Attributes(); 12 | 13 | public PointFeatureWritable() 14 | { 15 | } 16 | 17 | @Override 18 | public void write(final DataOutput dataOutput) throws IOException 19 | { 20 | super.write(dataOutput); 21 | attributes.write(dataOutput); 22 | } 23 | 24 | @Override 25 | public void readFields(final DataInput dataInput) throws IOException 26 | { 27 | super.readFields(dataInput); 28 | attributes.readFields(dataInput); 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/esri/io/PointWritable.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import com.esri.core.geometry.Point; 4 | import org.apache.hadoop.io.Writable; 5 | 6 | import java.io.DataInput; 7 | import java.io.DataOutput; 8 | import java.io.IOException; 9 | 10 | /** 11 | */ 12 | public class PointWritable implements Writable 13 | { 14 | public Point point; 15 | 16 | public PointWritable() 17 | { 18 | point = new Point(); 19 | } 20 | 21 | @Override 22 | public void write(final DataOutput dataOutput) throws IOException 23 | { 24 | dataOutput.writeDouble(point.getX()); 25 | dataOutput.writeDouble(point.getY()); 26 | } 27 | 28 | @Override 29 | public void readFields(final DataInput dataInput) throws IOException 30 | { 31 | point.setEmpty(); 32 | point.setX(dataInput.readDouble()); 33 | point.setY(dataInput.readDouble()); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/esri/io/PolygonFeatureWritable.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import java.io.DataInput; 4 | import java.io.DataOutput; 5 | import java.io.IOException; 6 | 7 | /** 8 | */ 9 | public class PolygonFeatureWritable extends PolygonWritable 10 | { 11 | public final Attributes attributes = new Attributes(); 12 | 13 | public PolygonFeatureWritable() 14 | { 15 | } 16 | 17 | @Override 18 | public void write(final DataOutput dataOutput) throws IOException 19 | { 20 | super.write(dataOutput); 21 | attributes.write(dataOutput); 22 | } 23 | 24 | @Override 25 | public void readFields(final DataInput dataInput) throws IOException 26 | { 27 | super.readFields(dataInput); 28 | attributes.readFields(dataInput); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/com/esri/io/PolygonWritable.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import com.esri.core.geometry.Point2D; 4 | import com.esri.core.geometry.Polygon; 5 | import org.apache.hadoop.io.Writable; 6 | 7 | import java.io.DataInput; 8 | import java.io.DataOutput; 9 | import java.io.IOException; 10 | 11 | /** 12 | */ 13 | public class PolygonWritable 14 | implements Writable 15 | { 16 | public Polygon polygon; 17 | 18 | public PolygonWritable() 19 | { 20 | polygon = new Polygon(); 21 | } 22 | 23 | @Override 24 | public void write(final DataOutput dataOutput) throws IOException 25 | { 26 | final int pathCount = polygon.getPathCount(); 27 | dataOutput.writeInt(pathCount); 28 | for (int i = 0; i < pathCount; i++) 29 | { 30 | dataOutput.writeInt(polygon.getPathSize(i)); 31 | } 32 | final Point2D point2D = new Point2D(); 33 | final int pointCount = polygon.getPointCount(); 34 | for (int i = 0; i < pointCount; i++) 35 | { 36 | polygon.getXY(i, point2D); // Had to patch https://github.com/Esri/geometry-api-java 37 | dataOutput.writeDouble(point2D.x); 38 | dataOutput.writeDouble(point2D.y); 39 | } 40 | } 41 | 42 | @Override 43 | public void readFields(final DataInput dataInput) throws IOException 44 | { 45 | polygon.setEmpty(); 46 | final int pathCount = dataInput.readInt(); 47 | final int[] pathSizes = new int[pathCount]; 48 | for (int p = 0; p < pathCount; p++) 49 | { 50 | pathSizes[p] = dataInput.readInt(); 51 | } 52 | for (final int pathSize : pathSizes) 53 | { 54 | for (int c = 0; c < pathSize; c++) 55 | { 56 | final double x = dataInput.readDouble(); 57 | final double y = dataInput.readDouble(); 58 | if (c > 0) 59 | { 60 | polygon.lineTo(x, y); 61 | } 62 | else 63 | { 64 | polygon.startPath(x, y); 65 | } 66 | } 67 | } 68 | polygon.closeAllPaths(); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/com/esri/io/PolylineMWritable.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import org.apache.hadoop.io.Writable; 4 | 5 | import java.io.DataInput; 6 | import java.io.DataOutput; 7 | import java.io.IOException; 8 | import java.io.Serializable; 9 | 10 | /** 11 | */ 12 | public class PolylineMWritable implements Writable, Serializable 13 | { 14 | public int[] lens; // The length in each part - lens.length is the number of parts 15 | public double[] x; 16 | public double[] y; 17 | public double[] m; 18 | 19 | @Override 20 | public void write(final DataOutput dataOutput) throws IOException 21 | { 22 | dataOutput.writeInt(lens.length); 23 | for (final int len : lens) 24 | { 25 | dataOutput.writeInt(len); 26 | } 27 | final int length = x.length; 28 | dataOutput.writeInt(length); 29 | for (int l = 0; l < length; l++) 30 | { 31 | dataOutput.writeDouble(x[l]); 32 | dataOutput.writeDouble(y[l]); 33 | dataOutput.writeDouble(m[l]); 34 | } 35 | } 36 | 37 | @Override 38 | public void readFields(final DataInput dataInput) throws IOException 39 | { 40 | final int plen = dataInput.readInt(); 41 | lens = new int[plen]; 42 | for (int l = 0; l < plen; l++) 43 | { 44 | lens[l] = dataInput.readInt(); 45 | } 46 | final int clen = dataInput.readInt(); 47 | x = new double[clen]; 48 | y = new double[clen]; 49 | m = new double[clen]; 50 | for (int l = 0; l < clen; l++) 51 | { 52 | x[l] = dataInput.readDouble(); 53 | y[l] = dataInput.readDouble(); 54 | m[l] = dataInput.readDouble(); 55 | } 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapred/AbstractFeatureReader.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.dbf.DBFField; 4 | import com.esri.dbf.DBFReader; 5 | import org.apache.hadoop.fs.FSDataInputStream; 6 | import org.apache.hadoop.fs.Path; 7 | import org.apache.hadoop.io.MapWritable; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.Writable; 10 | import org.apache.hadoop.mapred.FileSplit; 11 | import org.apache.hadoop.mapred.InputSplit; 12 | import org.apache.hadoop.mapred.JobConf; 13 | 14 | import java.io.IOException; 15 | import java.util.ArrayList; 16 | import java.util.List; 17 | 18 | /** 19 | */ 20 | abstract class AbstractFeatureReader 21 | extends AbstractReader 22 | { 23 | protected FSDataInputStream m_dbfStream; 24 | protected DBFReader m_dbfReader; 25 | protected List m_keys; 26 | 27 | public AbstractFeatureReader( 28 | final InputSplit inputSplit, 29 | final JobConf jobConf) throws IOException 30 | { 31 | super(inputSplit, jobConf); 32 | // No need to check if instance of FileSplit as it is done in super class. 33 | final FileSplit fileSplit = (FileSplit) inputSplit; 34 | // Get .shp file 35 | final Path shpPath = fileSplit.getPath(); 36 | final String dbfName = shpPath.getName().replace(".shp", ".dbf"); 37 | final Path dbfPath = new Path(shpPath.getParent(), dbfName); 38 | m_dbfStream = dbfPath.getFileSystem(jobConf).open(dbfPath); 39 | m_dbfReader = new DBFReader(m_dbfStream); 40 | // Create a list of field name as Hadoop Text instances 41 | final List fields = m_dbfReader.getFields(); 42 | m_keys = new ArrayList(fields.size()); 43 | for (final DBFField field : fields) 44 | { 45 | m_keys.add(new Text(field.fieldName)); 46 | } 47 | } 48 | 49 | protected void putAttributes(final MapWritable attributes) throws IOException 50 | { 51 | final int len = m_dbfReader.getNumberOfFields(); 52 | m_dbfReader.nextDataType(); 53 | for (int i = 0; i < len; i++) 54 | { 55 | attributes.put(m_keys.get(i), m_dbfReader.readFieldWritable(i)); 56 | } 57 | } 58 | 59 | @Override 60 | public void close() throws IOException 61 | { 62 | // Close shp 63 | super.close(); 64 | if (m_dbfStream != null) 65 | { 66 | m_dbfStream.close(); 67 | m_dbfStream = null; 68 | } 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapred/AbstractInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import org.apache.hadoop.fs.FileStatus; 4 | import org.apache.hadoop.fs.FileSystem; 5 | import org.apache.hadoop.fs.Path; 6 | import org.apache.hadoop.io.LongWritable; 7 | import org.apache.hadoop.io.Writable; 8 | import org.apache.hadoop.mapred.FileInputFormat; 9 | import org.apache.hadoop.mapred.InputSplit; 10 | import org.apache.hadoop.mapred.JobConf; 11 | import org.apache.hadoop.mapred.RecordReader; 12 | import org.apache.hadoop.mapred.Reporter; 13 | 14 | import java.io.IOException; 15 | import java.util.ArrayList; 16 | import java.util.List; 17 | 18 | /** 19 | */ 20 | abstract class AbstractInputFormat 21 | extends FileInputFormat 22 | { 23 | @Override 24 | protected FileStatus[] listStatus(final JobConf job) throws IOException 25 | { 26 | final FileStatus[] orig = super.listStatus(job); 27 | final List list = new ArrayList(orig.length); 28 | for (final FileStatus fileStatus : orig) 29 | { 30 | final String name = fileStatus.getPath().getName().toLowerCase(); 31 | if (name.endsWith(".shp")) 32 | { 33 | list.add(fileStatus); 34 | } 35 | } 36 | final FileStatus[] dest = new FileStatus[list.size()]; 37 | list.toArray(dest); 38 | return dest; 39 | } 40 | 41 | @Override 42 | protected boolean isSplitable( 43 | final FileSystem fs, 44 | final Path path) 45 | { 46 | return false; 47 | } 48 | 49 | @Override 50 | public abstract RecordReader getRecordReader( 51 | final InputSplit inputSplit, 52 | final JobConf jobConf, 53 | final Reporter reporter) throws IOException; 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapred/AbstractReader.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.shp.ShpReader; 4 | import org.apache.hadoop.fs.FSDataInputStream; 5 | import org.apache.hadoop.fs.Path; 6 | import org.apache.hadoop.io.LongWritable; 7 | import org.apache.hadoop.io.Writable; 8 | import org.apache.hadoop.mapred.FileSplit; 9 | import org.apache.hadoop.mapred.InputSplit; 10 | import org.apache.hadoop.mapred.JobConf; 11 | import org.apache.hadoop.mapred.RecordReader; 12 | 13 | import java.io.IOException; 14 | 15 | /** 16 | */ 17 | abstract class AbstractReader 18 | implements RecordReader 19 | { 20 | protected final LongWritable m_key = new LongWritable(); 21 | protected float m_length; 22 | protected FSDataInputStream m_shpStream; 23 | protected ShpReader m_shpReader; 24 | 25 | public AbstractReader( 26 | final InputSplit inputSplit, 27 | final JobConf jobConf) throws IOException 28 | { 29 | if (inputSplit instanceof FileSplit) 30 | { 31 | final FileSplit fileSplit = (FileSplit) inputSplit; 32 | m_length = fileSplit.getLength(); 33 | final Path path = fileSplit.getPath(); 34 | m_shpStream = path.getFileSystem(jobConf).open(path); 35 | m_shpReader = new ShpReader(m_shpStream); 36 | } 37 | else 38 | { 39 | throw new IOException("Input split is not an instance of FileSplit"); 40 | } 41 | } 42 | 43 | @Override 44 | public LongWritable createKey() 45 | { 46 | return m_key; 47 | } 48 | 49 | @Override 50 | public long getPos() throws IOException 51 | { 52 | return m_shpStream.getPos(); 53 | } 54 | 55 | @Override 56 | public float getProgress() throws IOException 57 | { 58 | return m_shpStream.getPos() / m_length; 59 | } 60 | 61 | @Override 62 | public void close() throws IOException 63 | { 64 | if (m_shpStream != null) 65 | { 66 | m_shpStream.close(); 67 | m_shpStream = null; 68 | } 69 | } 70 | 71 | @Override 72 | public boolean next( 73 | final LongWritable key, 74 | final T value) throws IOException 75 | { 76 | final boolean hasMore = m_shpReader.hasMore(); 77 | if (hasMore) 78 | { 79 | next(); 80 | m_key.set(m_shpReader.recordNumber); 81 | } 82 | return hasMore; 83 | } 84 | 85 | protected abstract void next() throws IOException; 86 | 87 | @Override 88 | public abstract T createValue(); 89 | 90 | } 91 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapred/DBFInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import org.apache.hadoop.fs.FileStatus; 4 | import org.apache.hadoop.fs.FileSystem; 5 | import org.apache.hadoop.fs.Path; 6 | import org.apache.hadoop.io.LongWritable; 7 | import org.apache.hadoop.io.MapWritable; 8 | import org.apache.hadoop.mapred.FileInputFormat; 9 | import org.apache.hadoop.mapred.InputSplit; 10 | import org.apache.hadoop.mapred.JobConf; 11 | import org.apache.hadoop.mapred.RecordReader; 12 | import org.apache.hadoop.mapred.Reporter; 13 | 14 | import java.io.IOException; 15 | import java.util.ArrayList; 16 | import java.util.List; 17 | 18 | /** 19 | */ 20 | public class DBFInputFormat extends FileInputFormat 21 | { 22 | @Override 23 | protected FileStatus[] listStatus(final JobConf job) throws IOException 24 | { 25 | final FileStatus[] orig = super.listStatus(job); 26 | final List list = new ArrayList(orig.length); 27 | for (final FileStatus fileStatus : orig) 28 | { 29 | final String name = fileStatus.getPath().getName().toLowerCase(); 30 | if (name.endsWith(".dbf")) 31 | { 32 | list.add(fileStatus); 33 | } 34 | } 35 | final FileStatus[] dest = new FileStatus[list.size()]; 36 | list.toArray(dest); 37 | return dest; 38 | } 39 | 40 | @Override 41 | protected boolean isSplitable( 42 | final FileSystem fs, 43 | final Path path) 44 | { 45 | return false; 46 | } 47 | 48 | @Override 49 | public RecordReader getRecordReader( 50 | final InputSplit inputSplit, 51 | final JobConf jobConf, 52 | final Reporter reporter) throws IOException 53 | { 54 | return new DBFRecordReader(inputSplit, jobConf); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapred/DBFRecordReader.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.dbf.DBFField; 4 | import com.esri.dbf.DBFReader; 5 | import com.esri.dbf.DBFType; 6 | import org.apache.hadoop.fs.FSDataInputStream; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.io.LongWritable; 9 | import org.apache.hadoop.io.MapWritable; 10 | import org.apache.hadoop.io.Text; 11 | import org.apache.hadoop.mapred.FileSplit; 12 | import org.apache.hadoop.mapred.InputSplit; 13 | import org.apache.hadoop.mapred.JobConf; 14 | import org.apache.hadoop.mapred.RecordReader; 15 | 16 | import java.io.IOException; 17 | import java.util.ArrayList; 18 | import java.util.List; 19 | 20 | /** 21 | */ 22 | public class DBFRecordReader 23 | implements RecordReader 24 | { 25 | protected final LongWritable m_key = new LongWritable(); 26 | protected final MapWritable m_value = new MapWritable(); 27 | protected float m_length; 28 | protected FSDataInputStream m_dbfStream; 29 | protected DBFReader m_dbfReader; 30 | protected List m_keys; 31 | protected long m_recno; 32 | 33 | public DBFRecordReader( 34 | final InputSplit inputSplit, 35 | final JobConf jobConf) throws IOException 36 | { 37 | if (inputSplit instanceof FileSplit) 38 | { 39 | final FileSplit fileSplit = (FileSplit) inputSplit; 40 | m_length = fileSplit.getLength(); 41 | final Path path = fileSplit.getPath(); 42 | m_dbfStream = path.getFileSystem(jobConf).open(path); 43 | m_dbfReader = new DBFReader(m_dbfStream); 44 | 45 | final List fields = m_dbfReader.getFields(); 46 | m_keys = new ArrayList(fields.size()); 47 | for (final DBFField field : fields) 48 | { 49 | m_keys.add(new Text(field.fieldName)); 50 | } 51 | } 52 | else 53 | { 54 | throw new IOException("Input split is not an instance of FileSplit"); 55 | } 56 | } 57 | 58 | @Override 59 | public LongWritable createKey() 60 | { 61 | return m_key; 62 | } 63 | 64 | @Override 65 | public MapWritable createValue() 66 | { 67 | return m_value; 68 | } 69 | 70 | @Override 71 | public long getPos() throws IOException 72 | { 73 | return m_dbfStream.getPos(); 74 | } 75 | 76 | @Override 77 | public float getProgress() throws IOException 78 | { 79 | return m_dbfStream.getPos() / m_length; 80 | } 81 | 82 | @Override 83 | public boolean next( 84 | final LongWritable key, 85 | final MapWritable value) throws IOException 86 | { 87 | final boolean hasNext = m_dbfReader.nextDataType() != DBFType.END; 88 | if (hasNext) 89 | { 90 | m_key.set(m_recno++); 91 | final int numFields = m_dbfReader.getNumberOfFields(); 92 | for (int i = 0; i < numFields; i++) 93 | { 94 | value.put(m_keys.get(i), m_dbfReader.readFieldWritable(i)); 95 | } 96 | } 97 | return hasNext; 98 | } 99 | 100 | @Override 101 | public void close() throws IOException 102 | { 103 | if (m_dbfStream != null) 104 | { 105 | m_dbfStream.close(); 106 | m_dbfStream = null; 107 | } 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapred/PointFeatureInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.io.PointFeatureWritable; 4 | import org.apache.hadoop.io.LongWritable; 5 | import org.apache.hadoop.mapred.InputSplit; 6 | import org.apache.hadoop.mapred.JobConf; 7 | import org.apache.hadoop.mapred.RecordReader; 8 | import org.apache.hadoop.mapred.Reporter; 9 | 10 | import java.io.IOException; 11 | 12 | /** 13 | */ 14 | public class PointFeatureInputFormat 15 | extends AbstractInputFormat 16 | { 17 | private final class PointFeatureReader 18 | extends AbstractFeatureReader 19 | { 20 | private final PointFeatureWritable m_pointFeatureWritable = new PointFeatureWritable(); 21 | 22 | public PointFeatureReader( 23 | final InputSplit inputSplit, 24 | final JobConf jobConf) throws IOException 25 | { 26 | super(inputSplit, jobConf); 27 | } 28 | 29 | @Override 30 | public PointFeatureWritable createValue() 31 | { 32 | return m_pointFeatureWritable; 33 | } 34 | 35 | @Override 36 | protected void next() throws IOException 37 | { 38 | m_shpReader.queryPoint(m_pointFeatureWritable.point); 39 | putAttributes(m_pointFeatureWritable.attributes); 40 | } 41 | 42 | } 43 | 44 | @Override 45 | public RecordReader getRecordReader( 46 | final InputSplit inputSplit, 47 | final JobConf jobConf, 48 | final Reporter reporter) throws IOException 49 | { 50 | return new PointFeatureReader(inputSplit, jobConf); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapred/PointInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.io.PointWritable; 4 | import org.apache.hadoop.io.LongWritable; 5 | import org.apache.hadoop.mapred.InputSplit; 6 | import org.apache.hadoop.mapred.JobConf; 7 | import org.apache.hadoop.mapred.RecordReader; 8 | import org.apache.hadoop.mapred.Reporter; 9 | 10 | import java.io.IOException; 11 | 12 | /** 13 | */ 14 | public class PointInputFormat 15 | extends AbstractInputFormat 16 | { 17 | private final class PointReader 18 | extends AbstractReader 19 | { 20 | private final PointWritable m_pointWritable = new PointWritable(); 21 | 22 | public PointReader( 23 | final InputSplit inputSplit, 24 | final JobConf jobConf) throws IOException 25 | { 26 | super(inputSplit, jobConf); 27 | } 28 | 29 | @Override 30 | public PointWritable createValue() 31 | { 32 | return m_pointWritable; 33 | } 34 | 35 | @Override 36 | protected void next() throws IOException 37 | { 38 | m_shpReader.queryPoint(m_pointWritable.point); 39 | } 40 | } 41 | 42 | @Override 43 | public RecordReader getRecordReader( 44 | final InputSplit inputSplit, 45 | final JobConf jobConf, 46 | final Reporter reporter) throws IOException 47 | { 48 | return new PointReader(inputSplit, jobConf); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapred/PolygonFeatureInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.io.PolygonFeatureWritable; 4 | import org.apache.hadoop.io.LongWritable; 5 | import org.apache.hadoop.mapred.InputSplit; 6 | import org.apache.hadoop.mapred.JobConf; 7 | import org.apache.hadoop.mapred.RecordReader; 8 | import org.apache.hadoop.mapred.Reporter; 9 | 10 | import java.io.IOException; 11 | 12 | /** 13 | */ 14 | public class PolygonFeatureInputFormat 15 | extends AbstractInputFormat 16 | { 17 | private final class PolygonFeatureReader 18 | extends AbstractFeatureReader 19 | { 20 | private final PolygonFeatureWritable m_polygonFeatureWritable = new PolygonFeatureWritable(); 21 | 22 | public PolygonFeatureReader( 23 | final InputSplit inputSplit, 24 | final JobConf jobConf) throws IOException 25 | { 26 | super(inputSplit, jobConf); 27 | } 28 | 29 | @Override 30 | public PolygonFeatureWritable createValue() 31 | { 32 | return m_polygonFeatureWritable; 33 | } 34 | 35 | @Override 36 | protected void next() throws IOException 37 | { 38 | m_shpReader.queryPolygon(m_polygonFeatureWritable.polygon); 39 | putAttributes(m_polygonFeatureWritable.attributes); 40 | } 41 | } 42 | 43 | @Override 44 | public RecordReader getRecordReader( 45 | final InputSplit inputSplit, 46 | final JobConf jobConf, 47 | final Reporter reporter) throws IOException 48 | { 49 | return new PolygonFeatureReader(inputSplit, jobConf); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapred/PolygonInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.io.PolygonWritable; 4 | import org.apache.hadoop.io.LongWritable; 5 | import org.apache.hadoop.mapred.InputSplit; 6 | import org.apache.hadoop.mapred.JobConf; 7 | import org.apache.hadoop.mapred.RecordReader; 8 | import org.apache.hadoop.mapred.Reporter; 9 | 10 | import java.io.IOException; 11 | 12 | /** 13 | */ 14 | public class PolygonInputFormat 15 | extends AbstractInputFormat 16 | { 17 | private final class PolygonReader 18 | extends AbstractReader 19 | { 20 | private final PolygonWritable m_polygonWritable = new PolygonWritable(); 21 | 22 | public PolygonReader( 23 | final InputSplit inputSplit, 24 | final JobConf jobConf) throws IOException 25 | { 26 | super(inputSplit, jobConf); 27 | } 28 | 29 | @Override 30 | public PolygonWritable createValue() 31 | { 32 | return m_polygonWritable; 33 | } 34 | 35 | @Override 36 | protected void next() throws IOException 37 | { 38 | m_shpReader.queryPolygon(m_polygonWritable.polygon); 39 | } 40 | } 41 | 42 | @Override 43 | public RecordReader getRecordReader( 44 | final InputSplit inputSplit, 45 | final JobConf jobConf, 46 | final Reporter reporter) throws IOException 47 | { 48 | return new PolygonReader(inputSplit, jobConf); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapreduce/AbstractFeatureReader.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.dbf.DBFField; 4 | import com.esri.dbf.DBFReader; 5 | import org.apache.hadoop.fs.FSDataInputStream; 6 | import org.apache.hadoop.fs.Path; 7 | import org.apache.hadoop.io.MapWritable; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.Writable; 10 | import org.apache.hadoop.mapreduce.InputSplit; 11 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 12 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 13 | 14 | import java.io.IOException; 15 | import java.util.ArrayList; 16 | import java.util.List; 17 | 18 | /** 19 | */ 20 | public abstract class AbstractFeatureReader extends AbstractReader 21 | { 22 | protected FSDataInputStream m_dbfStream; 23 | protected DBFReader m_dbfReader; 24 | protected List m_keys; 25 | 26 | @Override 27 | public void initialize( 28 | final InputSplit inputSplit, 29 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 30 | { 31 | super.initialize(inputSplit, taskAttemptContext); 32 | // No need to check if instance of FileSplit as it is done in super class. 33 | final FileSplit fileSplit = (FileSplit) inputSplit; 34 | // Get .shp file 35 | final Path shpPath = fileSplit.getPath(); 36 | final String dbfName = shpPath.getName().replace(".shp", ".dbf"); 37 | final Path dbfPath = new Path(shpPath.getParent(), dbfName); 38 | m_dbfStream = dbfPath.getFileSystem(taskAttemptContext.getConfiguration()).open(dbfPath); 39 | m_dbfReader = new DBFReader(m_dbfStream); 40 | // Create a list of field name as Hadoop Text instances 41 | final List fields = m_dbfReader.getFields(); 42 | m_keys = new ArrayList(fields.size()); 43 | for (final DBFField field : fields) 44 | { 45 | m_keys.add(new Text(field.fieldName)); 46 | } 47 | } 48 | 49 | protected void putAttributes(final MapWritable attributes) throws IOException 50 | { 51 | final int n = m_dbfReader.getNumberOfFields(); 52 | m_dbfReader.nextDataType(); 53 | for (int i = 0; i < n; i++) 54 | { 55 | attributes.put(m_keys.get(i), m_dbfReader.readFieldWritable(i)); 56 | } 57 | } 58 | 59 | @Override 60 | public void close() throws IOException 61 | { 62 | // Close shp 63 | super.close(); 64 | if (m_dbfStream != null) 65 | { 66 | m_dbfStream.close(); 67 | m_dbfStream = null; 68 | } 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapreduce/AbstractInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import org.apache.hadoop.fs.FileStatus; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.io.Writable; 7 | import org.apache.hadoop.mapreduce.InputSplit; 8 | import org.apache.hadoop.mapreduce.JobContext; 9 | import org.apache.hadoop.mapreduce.RecordReader; 10 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 12 | 13 | import java.io.IOException; 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | 17 | /** 18 | */ 19 | abstract class AbstractInputFormat 20 | extends FileInputFormat 21 | { 22 | @Override 23 | protected List listStatus(final JobContext job) throws IOException 24 | { 25 | final List orig = super.listStatus(job); 26 | final List list = new ArrayList(); 27 | for (final FileStatus fileStatus : orig) 28 | { 29 | if (fileStatus.getPath() 30 | .getName() 31 | .toLowerCase() 32 | .endsWith(".shp")) 33 | { 34 | list.add(fileStatus); 35 | } 36 | } 37 | return list; 38 | } 39 | 40 | @Override 41 | protected boolean isSplitable( 42 | final JobContext context, 43 | final Path filename) 44 | { 45 | return false; 46 | } 47 | 48 | @Override 49 | public abstract RecordReader createRecordReader( 50 | final InputSplit inputSplit, 51 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException; 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapreduce/AbstractReader.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.shp.ShpReader; 4 | import org.apache.hadoop.fs.FSDataInputStream; 5 | import org.apache.hadoop.fs.FileSystem; 6 | import org.apache.hadoop.fs.Path; 7 | import org.apache.hadoop.io.LongWritable; 8 | import org.apache.hadoop.io.Writable; 9 | import org.apache.hadoop.mapreduce.InputSplit; 10 | import org.apache.hadoop.mapreduce.RecordReader; 11 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 12 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 13 | 14 | import java.io.IOException; 15 | 16 | /** 17 | */ 18 | abstract class AbstractReader 19 | extends RecordReader 20 | { 21 | protected final LongWritable m_recordNumber = new LongWritable(); 22 | protected long m_length; 23 | protected FSDataInputStream m_shpStream; 24 | protected ShpReader m_shpReader; 25 | 26 | @Override 27 | public void initialize( 28 | final InputSplit inputSplit, 29 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 30 | { 31 | if (inputSplit instanceof FileSplit) 32 | { 33 | final FileSplit fileSplit = (FileSplit) inputSplit; 34 | m_length = fileSplit.getLength(); 35 | final Path path = fileSplit.getPath(); 36 | final FileSystem fileSystem = path.getFileSystem(taskAttemptContext.getConfiguration()); 37 | m_shpStream = fileSystem.open(path); 38 | m_shpReader = new ShpReader(m_shpStream); 39 | } 40 | else 41 | { 42 | throw new IOException("Input split is not an instance of FileSplit"); 43 | } 44 | } 45 | 46 | @Override 47 | public LongWritable getCurrentKey() throws IOException, InterruptedException 48 | { 49 | return m_recordNumber; 50 | } 51 | 52 | @Override 53 | public boolean nextKeyValue() throws IOException, InterruptedException 54 | { 55 | final boolean hasMore = m_shpReader.hasMore(); 56 | if (hasMore) 57 | { 58 | next(); 59 | m_recordNumber.set(m_shpReader.recordNumber); 60 | } 61 | return hasMore; 62 | } 63 | 64 | protected abstract void next() throws IOException; 65 | 66 | @Override 67 | public abstract T getCurrentValue() throws IOException, InterruptedException; 68 | 69 | @Override 70 | public float getProgress() throws IOException, InterruptedException 71 | { 72 | return m_shpStream.getPos() / m_length; 73 | } 74 | 75 | @Override 76 | public void close() throws IOException 77 | { 78 | if (m_shpStream != null) 79 | { 80 | m_shpStream.close(); 81 | m_shpStream = null; 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapreduce/DBFInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import org.apache.hadoop.fs.FileStatus; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.io.MapWritable; 7 | import org.apache.hadoop.mapreduce.InputSplit; 8 | import org.apache.hadoop.mapreduce.JobContext; 9 | import org.apache.hadoop.mapreduce.RecordReader; 10 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 11 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 12 | 13 | import java.io.IOException; 14 | import java.util.List; 15 | 16 | /** 17 | */ 18 | public class DBFInputFormat 19 | extends FileInputFormat 20 | { 21 | @Override 22 | protected List listStatus(final JobContext job) throws IOException 23 | { 24 | final List list = super.listStatus(job); 25 | for (final FileStatus fileStatus : list) 26 | { 27 | if (!fileStatus.getPath().getName().toLowerCase().endsWith(".dbf")) 28 | { 29 | list.remove(fileStatus); 30 | } 31 | } 32 | return list; 33 | } 34 | 35 | @Override 36 | protected boolean isSplitable( 37 | final JobContext context, 38 | final Path filename) 39 | { 40 | return false; 41 | } 42 | 43 | @Override 44 | public RecordReader createRecordReader( 45 | final InputSplit inputSplit, 46 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 47 | { 48 | return new DBFRecordReader(inputSplit, taskAttemptContext); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapreduce/DBFRecordReader.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.dbf.DBFField; 4 | import com.esri.dbf.DBFReader; 5 | import com.esri.dbf.DBFType; 6 | import org.apache.hadoop.fs.FSDataInputStream; 7 | import org.apache.hadoop.fs.FileSystem; 8 | import org.apache.hadoop.fs.Path; 9 | import org.apache.hadoop.io.LongWritable; 10 | import org.apache.hadoop.io.MapWritable; 11 | import org.apache.hadoop.io.Text; 12 | import org.apache.hadoop.mapreduce.InputSplit; 13 | import org.apache.hadoop.mapreduce.RecordReader; 14 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 15 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 16 | 17 | import java.io.IOException; 18 | import java.util.ArrayList; 19 | import java.util.List; 20 | 21 | /** 22 | */ 23 | public class DBFRecordReader 24 | extends RecordReader 25 | { 26 | protected final LongWritable m_recordNumber = new LongWritable(); 27 | protected final MapWritable m_mapWritable = new MapWritable(); 28 | protected long m_length; 29 | protected FSDataInputStream m_dbfStream; 30 | protected DBFReader m_dbfReader; 31 | protected long m_recno; 32 | private ArrayList m_keys; 33 | 34 | public DBFRecordReader( 35 | final InputSplit inputSplit, 36 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 37 | { 38 | initialize(inputSplit, taskAttemptContext); 39 | } 40 | 41 | @Override 42 | public void initialize( 43 | final InputSplit inputSplit, 44 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 45 | { 46 | if (inputSplit instanceof FileSplit) 47 | { 48 | final FileSplit fileSplit = (FileSplit) inputSplit; 49 | m_length = fileSplit.getLength(); 50 | final Path path = fileSplit.getPath(); 51 | final FileSystem fileSystem = path.getFileSystem(taskAttemptContext.getConfiguration()); 52 | m_dbfStream = fileSystem.open(path); 53 | m_dbfReader = new DBFReader(m_dbfStream); 54 | 55 | final List fields = m_dbfReader.getFields(); 56 | m_keys = new ArrayList(fields.size()); 57 | for (final DBFField field : fields) 58 | { 59 | m_keys.add(new Text(field.fieldName)); 60 | } 61 | } 62 | else 63 | { 64 | throw new IOException("Input split is not an instance of FileSplit"); 65 | } 66 | } 67 | 68 | @Override 69 | public LongWritable getCurrentKey() throws IOException, InterruptedException 70 | { 71 | return m_recordNumber; 72 | } 73 | 74 | @Override 75 | public MapWritable getCurrentValue() throws IOException, InterruptedException 76 | { 77 | return m_mapWritable; 78 | } 79 | 80 | @Override 81 | public boolean nextKeyValue() throws IOException, InterruptedException 82 | { 83 | final boolean hasNext = m_dbfReader.nextDataType() != DBFType.END; 84 | if (hasNext) 85 | { 86 | m_recordNumber.set(m_recno++); 87 | final int numFields = m_dbfReader.getNumberOfFields(); 88 | for (int i = 0; i < numFields; i++) 89 | { 90 | m_mapWritable.put(m_keys.get(i), m_dbfReader.readFieldWritable(i)); 91 | } 92 | } 93 | return hasNext; 94 | } 95 | 96 | @Override 97 | public float getProgress() throws IOException, InterruptedException 98 | { 99 | return m_dbfStream.getPos() / m_length; 100 | } 101 | 102 | @Override 103 | public void close() throws IOException 104 | { 105 | if (m_dbfStream != null) 106 | { 107 | m_dbfStream.close(); 108 | m_dbfStream = null; 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapreduce/PointFeatureInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.io.PointFeatureWritable; 4 | import org.apache.hadoop.io.LongWritable; 5 | import org.apache.hadoop.mapreduce.InputSplit; 6 | import org.apache.hadoop.mapreduce.RecordReader; 7 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 8 | 9 | import java.io.IOException; 10 | 11 | /** 12 | */ 13 | public class PointFeatureInputFormat extends AbstractInputFormat 14 | { 15 | private final class PointFeatureReader extends AbstractFeatureReader 16 | { 17 | private final PointFeatureWritable m_pointFeatureWritable = new PointFeatureWritable(); 18 | 19 | public PointFeatureReader( 20 | final InputSplit inputSplit, 21 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 22 | { 23 | initialize(inputSplit, taskAttemptContext); 24 | } 25 | 26 | @Override 27 | protected void next() throws IOException 28 | { 29 | m_shpReader.queryPoint(m_pointFeatureWritable.point); 30 | putAttributes(m_pointFeatureWritable.attributes); 31 | } 32 | 33 | @Override 34 | public PointFeatureWritable getCurrentValue() throws IOException, InterruptedException 35 | { 36 | return m_pointFeatureWritable; 37 | } 38 | 39 | } 40 | 41 | @Override 42 | public RecordReader createRecordReader( 43 | final InputSplit inputSplit, 44 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 45 | { 46 | return new PointFeatureReader(inputSplit, taskAttemptContext); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapreduce/PointInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.io.PointWritable; 4 | import org.apache.hadoop.io.LongWritable; 5 | import org.apache.hadoop.mapreduce.InputSplit; 6 | import org.apache.hadoop.mapreduce.RecordReader; 7 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 8 | 9 | import java.io.IOException; 10 | 11 | /** 12 | */ 13 | public class PointInputFormat extends AbstractInputFormat 14 | { 15 | private final class PointReader extends AbstractReader 16 | { 17 | private final PointWritable m_pointWritable = new PointWritable(); 18 | 19 | public PointReader( 20 | final InputSplit inputSplit, 21 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 22 | { 23 | initialize(inputSplit, taskAttemptContext); 24 | } 25 | 26 | @Override 27 | protected void next() throws IOException 28 | { 29 | m_shpReader.queryPoint(m_pointWritable.point); 30 | } 31 | 32 | @Override 33 | public PointWritable getCurrentValue() throws IOException, InterruptedException 34 | { 35 | return m_pointWritable; 36 | } 37 | 38 | } 39 | 40 | @Override 41 | public RecordReader createRecordReader( 42 | final InputSplit inputSplit, 43 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 44 | { 45 | return new PointReader(inputSplit, taskAttemptContext); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapreduce/PolygonFeatureInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.io.PolygonFeatureWritable; 4 | import org.apache.hadoop.io.LongWritable; 5 | import org.apache.hadoop.mapreduce.InputSplit; 6 | import org.apache.hadoop.mapreduce.RecordReader; 7 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 8 | 9 | import java.io.IOException; 10 | 11 | /** 12 | */ 13 | public class PolygonFeatureInputFormat extends AbstractInputFormat 14 | { 15 | private final class PolygonFeatureReader extends AbstractFeatureReader 16 | { 17 | private final PolygonFeatureWritable m_polygonFeatureWritable = new PolygonFeatureWritable(); 18 | 19 | public PolygonFeatureReader( 20 | final InputSplit inputSplit, 21 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 22 | { 23 | initialize(inputSplit, taskAttemptContext); 24 | } 25 | 26 | @Override 27 | protected void next() throws IOException 28 | { 29 | m_shpReader.queryPolygon(m_polygonFeatureWritable.polygon); 30 | putAttributes(m_polygonFeatureWritable.attributes); 31 | } 32 | 33 | @Override 34 | public PolygonFeatureWritable getCurrentValue() throws IOException, InterruptedException 35 | { 36 | return m_polygonFeatureWritable; 37 | } 38 | 39 | } 40 | 41 | @Override 42 | public RecordReader createRecordReader( 43 | final InputSplit inputSplit, 44 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 45 | { 46 | return new PolygonFeatureReader(inputSplit, taskAttemptContext); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/esri/mapreduce/PolygonInputFormat.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.io.PolygonWritable; 4 | import org.apache.hadoop.io.LongWritable; 5 | import org.apache.hadoop.mapreduce.InputSplit; 6 | import org.apache.hadoop.mapreduce.RecordReader; 7 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 8 | 9 | import java.io.IOException; 10 | 11 | /** 12 | */ 13 | public class PolygonInputFormat extends AbstractInputFormat 14 | { 15 | private final class PolygonReader extends AbstractReader 16 | { 17 | private final PolygonWritable m_polygonWritable = new PolygonWritable(); 18 | 19 | public PolygonReader( 20 | final InputSplit inputSplit, 21 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 22 | { 23 | initialize(inputSplit, taskAttemptContext); 24 | } 25 | 26 | @Override 27 | protected void next() throws IOException 28 | { 29 | m_shpReader.queryPolygon(m_polygonWritable.polygon); 30 | } 31 | 32 | @Override 33 | public PolygonWritable getCurrentValue() throws IOException, InterruptedException 34 | { 35 | return m_polygonWritable; 36 | } 37 | 38 | } 39 | 40 | @Override 41 | public RecordReader createRecordReader( 42 | final InputSplit inputSplit, 43 | final TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException 44 | { 45 | return new PolygonReader(inputSplit, taskAttemptContext); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/com/esri/shp/ShpHeader.java: -------------------------------------------------------------------------------- 1 | package com.esri.shp; 2 | 3 | import org.apache.commons.io.EndianUtils; 4 | 5 | import java.io.DataInputStream; 6 | import java.io.IOException; 7 | import java.io.Serializable; 8 | 9 | /** 10 | */ 11 | public class ShpHeader implements Serializable 12 | { 13 | public transient int fileLength; 14 | public transient int version; 15 | public transient int shapeType; 16 | public transient double xmin; 17 | public transient double ymin; 18 | public transient double xmax; 19 | public transient double ymax; 20 | public transient double zmin; 21 | public transient double zmax; 22 | public transient double mmin; 23 | public transient double mmax; 24 | 25 | public ShpHeader(final DataInputStream dataInputStream) throws IOException 26 | { 27 | final int signature = dataInputStream.readInt(); 28 | if (signature != 9994) 29 | { 30 | throw new IOException("Not a valid shapefile. Expected 9994 as file header !"); 31 | } 32 | 33 | dataInputStream.skip(5 * 4); 34 | 35 | fileLength = dataInputStream.readInt(); 36 | 37 | version = EndianUtils.readSwappedInteger(dataInputStream); 38 | shapeType = EndianUtils.readSwappedInteger(dataInputStream); 39 | 40 | xmin = EndianUtils.readSwappedDouble(dataInputStream); 41 | ymin = EndianUtils.readSwappedDouble(dataInputStream); 42 | xmax = EndianUtils.readSwappedDouble(dataInputStream); 43 | ymax = EndianUtils.readSwappedDouble(dataInputStream); 44 | zmin = EndianUtils.readSwappedDouble(dataInputStream); 45 | zmax = EndianUtils.readSwappedDouble(dataInputStream); 46 | mmin = EndianUtils.readSwappedDouble(dataInputStream); 47 | mmax = EndianUtils.readSwappedDouble(dataInputStream); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/com/esri/shp/ShpReader.java: -------------------------------------------------------------------------------- 1 | package com.esri.shp; 2 | 3 | import com.esri.core.geometry.Point; 4 | import com.esri.core.geometry.Polygon; 5 | import com.esri.io.PolylineMWritable; 6 | import org.apache.commons.io.EndianUtils; 7 | 8 | import java.io.DataInputStream; 9 | import java.io.IOException; 10 | import java.io.Serializable; 11 | 12 | /** 13 | * http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf 14 | */ 15 | public class ShpReader implements Serializable 16 | { 17 | private transient DataInputStream m_dataInputStream; 18 | private transient ShpHeader m_shpHeader; 19 | 20 | private transient int m_parts[] = new int[4]; 21 | 22 | public transient int recordNumber; 23 | public transient int contentLength; 24 | public transient int contentLengthInBytes; 25 | public transient int shapeType; 26 | public transient double xmin; 27 | public transient double ymin; 28 | public transient double xmax; 29 | public transient double ymax; 30 | public transient double mmin; 31 | public transient double mmax; 32 | public transient int numParts; 33 | public transient int numPoints; 34 | 35 | public ShpReader(final DataInputStream dataInputStream) throws IOException 36 | { 37 | m_dataInputStream = dataInputStream; 38 | m_shpHeader = new ShpHeader(dataInputStream); 39 | } 40 | 41 | public ShpHeader getHeader() 42 | { 43 | return m_shpHeader; 44 | } 45 | 46 | public boolean hasMore() throws IOException 47 | { 48 | return m_dataInputStream.available() > 0; 49 | } 50 | 51 | private void readRecordHeader() throws IOException 52 | { 53 | recordNumber = m_dataInputStream.readInt(); 54 | contentLength = m_dataInputStream.readInt(); 55 | contentLengthInBytes = contentLength + contentLength - 4; 56 | 57 | shapeType = EndianUtils.readSwappedInteger(m_dataInputStream); 58 | } 59 | 60 | public Point readPoint() throws IOException 61 | { 62 | return queryPoint(new Point()); 63 | } 64 | 65 | public Polygon readPolygon() throws IOException 66 | { 67 | return queryPolygon(new Polygon()); 68 | } 69 | 70 | public Point queryPoint(final Point point) throws IOException 71 | { 72 | readRecordHeader(); 73 | point.setX(EndianUtils.readSwappedDouble(m_dataInputStream)); 74 | point.setY(EndianUtils.readSwappedDouble(m_dataInputStream)); 75 | return point; 76 | } 77 | 78 | public Polygon queryPolygon(final Polygon polygon) throws IOException 79 | { 80 | polygon.setEmpty(); 81 | 82 | readRecordHeader(); 83 | 84 | readShapeHeader(); 85 | 86 | for (int i = 0, j = 1; i < numParts; ) 87 | { 88 | final int count = m_parts[j++] - m_parts[i++]; 89 | for (int c = 0; c < count; c++) 90 | { 91 | final double x = EndianUtils.readSwappedDouble(m_dataInputStream); 92 | final double y = EndianUtils.readSwappedDouble(m_dataInputStream); 93 | if (c > 0) 94 | { 95 | polygon.lineTo(x, y); 96 | } 97 | else 98 | { 99 | polygon.startPath(x, y); 100 | } 101 | } 102 | } 103 | 104 | polygon.closeAllPaths(); 105 | 106 | return polygon; 107 | } 108 | 109 | public PolylineMWritable readPolylineMWritable() throws IOException 110 | { 111 | final PolylineMWritable polylineMWritable = new PolylineMWritable(); 112 | 113 | readRecordHeader(); 114 | readShapeHeader(); 115 | 116 | polylineMWritable.lens = new int[numParts]; 117 | polylineMWritable.x = new double[numPoints]; 118 | polylineMWritable.y = new double[numPoints]; 119 | polylineMWritable.m = new double[numPoints]; 120 | 121 | int p = 0; 122 | for (int i = 0, j = 1; i < numParts; i++, j++) 123 | { 124 | final int count = m_parts[j] - m_parts[i]; 125 | polylineMWritable.lens[i] = count; 126 | for (int c = 0; c < count; c++, p++) 127 | { 128 | polylineMWritable.x[p] = EndianUtils.readSwappedDouble(m_dataInputStream); 129 | polylineMWritable.y[p] = EndianUtils.readSwappedDouble(m_dataInputStream); 130 | } 131 | } 132 | 133 | mmin = EndianUtils.readSwappedDouble(m_dataInputStream); 134 | mmax = EndianUtils.readSwappedDouble(m_dataInputStream); 135 | 136 | for (p = 0; p < numPoints; p++) 137 | { 138 | polylineMWritable.m[p] = EndianUtils.readSwappedDouble(m_dataInputStream); 139 | } 140 | 141 | return polylineMWritable; 142 | } 143 | 144 | private void readShapeHeader() throws IOException 145 | { 146 | xmin = EndianUtils.readSwappedDouble(m_dataInputStream); 147 | ymin = EndianUtils.readSwappedDouble(m_dataInputStream); 148 | xmax = EndianUtils.readSwappedDouble(m_dataInputStream); 149 | ymax = EndianUtils.readSwappedDouble(m_dataInputStream); 150 | 151 | numParts = EndianUtils.readSwappedInteger(m_dataInputStream); 152 | numPoints = EndianUtils.readSwappedInteger(m_dataInputStream); 153 | 154 | if ((numParts + 1) > m_parts.length) 155 | { 156 | m_parts = new int[numParts + 1]; 157 | } 158 | for (int p = 0; p < numParts; p++) 159 | { 160 | m_parts[p] = EndianUtils.readSwappedInteger(m_dataInputStream); 161 | } 162 | m_parts[numParts] = numPoints; 163 | } 164 | 165 | } 166 | -------------------------------------------------------------------------------- /src/main/java/com/esri/shp/ShxReader.java: -------------------------------------------------------------------------------- 1 | package com.esri.shp; 2 | 3 | import java.io.DataInputStream; 4 | import java.io.IOException; 5 | import java.io.Serializable; 6 | 7 | /** 8 | */ 9 | public class ShxReader implements Serializable 10 | { 11 | private transient DataInputStream dataInputStream; 12 | 13 | public transient ShpHeader shpHeader; 14 | public transient long recordOffset; 15 | public transient int recordLength; 16 | 17 | public ShxReader(final DataInputStream dataInputStream) throws IOException 18 | { 19 | this.dataInputStream = dataInputStream; 20 | shpHeader = new ShpHeader(dataInputStream); 21 | } 22 | 23 | public boolean hasMore() throws IOException 24 | { 25 | return dataInputStream.available() > 0; 26 | } 27 | 28 | /** 29 | * Read an SHX record. 30 | * 31 | * @return the seek position into the SHP file. 32 | * @throws IOException if an IO error occurs. 33 | */ 34 | public long readRecord() throws IOException 35 | { 36 | recordOffset = dataInputStream.readInt(); 37 | recordLength = dataInputStream.readInt(); 38 | return recordOffset << 1; 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/com/esri/dbf/DBFTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.dbf; 2 | 3 | import org.apache.hadoop.io.DoubleWritable; 4 | import org.apache.hadoop.io.FloatWritable; 5 | import org.apache.hadoop.io.IntWritable; 6 | import org.apache.hadoop.io.LongWritable; 7 | import org.apache.hadoop.io.Text; 8 | import org.apache.hadoop.io.Writable; 9 | import org.junit.Test; 10 | 11 | import java.io.DataInputStream; 12 | import java.io.IOException; 13 | import java.io.InputStream; 14 | import java.util.GregorianCalendar; 15 | import java.util.List; 16 | 17 | import static org.junit.Assert.assertEquals; 18 | import static org.junit.Assert.assertNotEquals; 19 | import static org.junit.Assert.assertNotNull; 20 | import static org.junit.Assert.assertTrue; 21 | 22 | /** 23 | */ 24 | public class DBFTest 25 | { 26 | @Test 27 | public void testDBFWritable() throws IOException 28 | { 29 | final InputStream inputStream = this.getClass().getResourceAsStream("/testpoint.dbf"); 30 | assertNotNull(inputStream); 31 | try 32 | { 33 | final DBFReader dbfReader = new DBFReader(new DataInputStream(inputStream)); 34 | final List fields = dbfReader.getFields(); 35 | assertEquals(6, fields.size()); 36 | assertField(fields.get(0), "AShort", 'N', 4, 0); 37 | assertField(fields.get(1), "ALong", 'N', 9, 0); 38 | assertField(fields.get(2), "AFloat", 'F', 13, 11); 39 | assertField(fields.get(3), "ANume106", 'N', 11, 6); 40 | assertField(fields.get(4), "AText50", 'C', 50, 0); 41 | assertField(fields.get(5), "ADate", 'D', 8, 0); 42 | 43 | assertEquals(1, dbfReader.getNumberOfRecords()); 44 | 45 | assertNotEquals(DBFType.END, dbfReader.nextDataType()); 46 | 47 | Writable writable = dbfReader.readFieldWritable(0); 48 | assertTrue(writable instanceof IntWritable); 49 | assertEquals(123, ((IntWritable) writable).get()); 50 | 51 | writable = dbfReader.readFieldWritable(1); 52 | assertTrue(writable instanceof LongWritable); 53 | assertEquals(12345, ((LongWritable) writable).get()); 54 | 55 | writable = dbfReader.readFieldWritable(2); 56 | assertTrue(writable instanceof FloatWritable); 57 | assertEquals(123.4, ((FloatWritable) writable).get(), 0.001); 58 | 59 | writable = dbfReader.readFieldWritable(3); 60 | assertTrue(writable instanceof DoubleWritable); 61 | assertEquals(123.45678, ((DoubleWritable) writable).get(), 0.000001); 62 | 63 | writable = dbfReader.readFieldWritable(4); 64 | assertTrue(writable instanceof Text); 65 | assertEquals("Hello, World", writable.toString().trim()); 66 | 67 | writable = dbfReader.readFieldWritable(5); 68 | assertTrue(writable instanceof LongWritable); 69 | final GregorianCalendar gregorianCalendar = new GregorianCalendar(2014, 5 - 1, 20); 70 | assertEquals(gregorianCalendar.getTimeInMillis(), ((LongWritable) writable).get()); 71 | } 72 | finally 73 | { 74 | inputStream.close(); 75 | } 76 | } 77 | 78 | @Test 79 | public void testDBFValues() throws IOException 80 | { 81 | final InputStream inputStream = this.getClass().getResourceAsStream("/testpoint.dbf"); 82 | assertNotNull(inputStream); 83 | try 84 | { 85 | final DataInputStream dataInputStream = new DataInputStream(inputStream); 86 | final DBFReader dbfReader = new DBFReader(dataInputStream); 87 | 88 | assertNotEquals(DBFType.END, dbfReader.nextDataType()); 89 | 90 | Object value = dbfReader.readFieldValue(0); 91 | assertTrue(value instanceof Short); 92 | assertEquals((short) 123, value); 93 | 94 | value = dbfReader.readFieldValue(1); 95 | assertTrue(value instanceof Long); 96 | assertEquals(12345L, value); 97 | 98 | value = dbfReader.readFieldValue(2); 99 | assertTrue(value instanceof Float); 100 | assertEquals(123.4F, (Float) value, 0.001); 101 | 102 | value = dbfReader.readFieldValue(3); 103 | assertTrue(value instanceof Double); 104 | assertEquals(123.45678, (Double) value, 0.000001); 105 | 106 | value = dbfReader.readFieldValue(4); 107 | assertTrue(value instanceof String); 108 | assertEquals("Hello, World", value.toString().trim()); 109 | 110 | value = dbfReader.readFieldValue(5); 111 | assertTrue(value instanceof Long); 112 | final GregorianCalendar gregorianCalendar = new GregorianCalendar(2014, 5 - 1, 20); 113 | assertEquals(gregorianCalendar.getTimeInMillis(), value); 114 | 115 | assertEquals(DBFType.END, dbfReader.nextDataType()); 116 | } 117 | finally 118 | { 119 | inputStream.close(); 120 | } 121 | } 122 | 123 | private void assertField( 124 | final DBFField dbfField, 125 | final String fieldName, 126 | final char dataType, 127 | final int fieldLength, 128 | final int decimalCount) 129 | { 130 | assertEquals(fieldName, dbfField.fieldName); 131 | assertEquals(dataType, dbfField.dataType); 132 | assertEquals(fieldLength, dbfField.fieldLength); 133 | assertEquals(decimalCount, dbfField.decimalCount); 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/test/java/com/esri/io/PointFeatureWritableTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import org.apache.hadoop.io.LongWritable; 4 | import org.apache.hadoop.io.Text; 5 | import org.junit.Test; 6 | 7 | import java.io.ByteArrayInputStream; 8 | import java.io.ByteArrayOutputStream; 9 | import java.io.DataInputStream; 10 | import java.io.DataOutputStream; 11 | import java.io.IOException; 12 | 13 | import static org.junit.Assert.assertEquals; 14 | 15 | /** 16 | */ 17 | public class PointFeatureWritableTest 18 | { 19 | @Test 20 | public void testWriteRead() throws Exception 21 | { 22 | final ByteArrayOutputStream byteArrayOutputStream = getByteArrayOutputStream(); 23 | 24 | final ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray()); 25 | final PointFeatureWritable pointFeatureWritable = new PointFeatureWritable(); 26 | pointFeatureWritable.readFields(new DataInputStream(byteArrayInputStream)); 27 | 28 | assertEquals(10.0, pointFeatureWritable.point.getX(), 0.000001); 29 | assertEquals(11.0, pointFeatureWritable.point.getY(), 0.000001); 30 | assertEquals(new LongWritable(1234), pointFeatureWritable.attributes.get(new Text("key"))); 31 | } 32 | 33 | private ByteArrayOutputStream getByteArrayOutputStream() throws IOException 34 | { 35 | final PointFeatureWritable pointFeatureWritable = new PointFeatureWritable(); 36 | pointFeatureWritable.point.setXY(10, 11); 37 | pointFeatureWritable.attributes.put(new Text("key"), new LongWritable(1234)); 38 | 39 | final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); 40 | pointFeatureWritable.write(new DataOutputStream(byteArrayOutputStream)); 41 | byteArrayOutputStream.flush(); 42 | return byteArrayOutputStream; 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/test/java/com/esri/io/PointWritableTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import com.esri.core.geometry.Point; 4 | import org.junit.Test; 5 | 6 | import java.io.ByteArrayInputStream; 7 | import java.io.ByteArrayOutputStream; 8 | import java.io.DataInput; 9 | import java.io.DataInputStream; 10 | import java.io.DataOutput; 11 | import java.io.DataOutputStream; 12 | import java.io.IOException; 13 | 14 | import static org.junit.Assert.assertEquals; 15 | 16 | /** 17 | */ 18 | public class PointWritableTest 19 | { 20 | @Test 21 | public void testWriteRead() throws IOException 22 | { 23 | final ByteArrayOutputStream byteArrayOutputStream = getByteArrayOutputStream(); 24 | 25 | final DataInput dataInput = new DataInputStream(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())); 26 | 27 | final PointWritable pointWritable = new PointWritable(); 28 | pointWritable.readFields(dataInput); 29 | 30 | final Point point = pointWritable.point; 31 | 32 | assertEquals(123, point.getX(), 0.000001); 33 | assertEquals(345, point.getY(), 0.000001); 34 | } 35 | 36 | private ByteArrayOutputStream getByteArrayOutputStream() throws IOException 37 | { 38 | final PointWritable pointWritable = new PointWritable(); 39 | pointWritable.point.setX(123); 40 | pointWritable.point.setY(345); 41 | 42 | final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); 43 | final DataOutput dataOutput = new DataOutputStream(byteArrayOutputStream); 44 | pointWritable.write(dataOutput); 45 | byteArrayOutputStream.flush(); 46 | return byteArrayOutputStream; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/test/java/com/esri/io/PolygonFeatureWritableTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import com.esri.core.geometry.Point2D; 4 | import com.esri.core.geometry.Polygon; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.io.Text; 7 | import org.junit.Test; 8 | 9 | import java.io.ByteArrayInputStream; 10 | import java.io.ByteArrayOutputStream; 11 | import java.io.DataInputStream; 12 | import java.io.DataOutputStream; 13 | import java.io.IOException; 14 | 15 | import static org.junit.Assert.assertEquals; 16 | 17 | /** 18 | */ 19 | public class PolygonFeatureWritableTest 20 | { 21 | @Test 22 | public void testWriteRead() throws Exception 23 | { 24 | final ByteArrayOutputStream byteArrayOutputStream = getByteArrayOutputStream(); 25 | 26 | final ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray()); 27 | final PolygonFeatureWritable polygonFeatureWritable = new PolygonFeatureWritable(); 28 | polygonFeatureWritable.readFields(new DataInputStream(byteArrayInputStream)); 29 | 30 | final Polygon polygon = polygonFeatureWritable.polygon; 31 | final Point2D[] coordinates2D = polygon.getCoordinates2D(); 32 | assertEquals(4, coordinates2D.length); 33 | 34 | assertEquals(0, coordinates2D[0].x, 0.000001); 35 | assertEquals(0, coordinates2D[0].y, 0.000001); 36 | 37 | assertEquals(10, coordinates2D[1].x, 0.000001); 38 | assertEquals(0, coordinates2D[1].y, 0.000001); 39 | 40 | assertEquals(10, coordinates2D[2].x, 0.000001); 41 | assertEquals(10, coordinates2D[2].y, 0.000001); 42 | 43 | assertEquals(0, coordinates2D[3].x, 0.000001); 44 | assertEquals(0, coordinates2D[3].y, 0.000001); 45 | 46 | assertEquals(new LongWritable(1234), polygonFeatureWritable.attributes.get(new Text("key"))); 47 | } 48 | 49 | private ByteArrayOutputStream getByteArrayOutputStream() throws IOException 50 | { 51 | final PolygonFeatureWritable pointFeatureWritable = new PolygonFeatureWritable(); 52 | final Polygon polygon = pointFeatureWritable.polygon; 53 | polygon.startPath(0, 0); 54 | polygon.lineTo(10, 0); 55 | polygon.lineTo(10, 10); 56 | polygon.lineTo(0, 0); 57 | polygon.closeAllPaths(); 58 | 59 | pointFeatureWritable.attributes.put(new Text("key"), new LongWritable(1234)); 60 | 61 | final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); 62 | pointFeatureWritable.write(new DataOutputStream(byteArrayOutputStream)); 63 | byteArrayOutputStream.flush(); 64 | return byteArrayOutputStream; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/test/java/com/esri/io/PolygonWritableTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import com.esri.core.geometry.Point2D; 4 | import com.esri.core.geometry.Polygon; 5 | import org.junit.Test; 6 | 7 | import java.io.ByteArrayInputStream; 8 | import java.io.ByteArrayOutputStream; 9 | import java.io.DataInput; 10 | import java.io.DataInputStream; 11 | import java.io.DataOutput; 12 | import java.io.DataOutputStream; 13 | import java.io.IOException; 14 | 15 | import static org.junit.Assert.assertEquals; 16 | 17 | /** 18 | */ 19 | public class PolygonWritableTest 20 | { 21 | @Test 22 | public void testWriteRead() throws IOException 23 | { 24 | final ByteArrayOutputStream byteArrayOutputStream = getByteArrayOutputStream(); 25 | 26 | final DataInput dataInput = new DataInputStream(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())); 27 | final PolygonWritable polygonWritable = new PolygonWritable(); 28 | polygonWritable.readFields(dataInput); 29 | 30 | final Polygon polygon = polygonWritable.polygon; 31 | final Point2D[] coordinates2D = polygon.getCoordinates2D(); 32 | 33 | assertEquals(9, coordinates2D.length); 34 | 35 | assertEquals(0.0, coordinates2D[0].x, 0.000001); 36 | assertEquals(0.0, coordinates2D[0].y, 0.000001); 37 | 38 | assertEquals(10.0, coordinates2D[1].x, 0.000001); 39 | assertEquals(0.0, coordinates2D[1].y, 0.000001); 40 | 41 | assertEquals(10.0, coordinates2D[2].x, 0.000001); 42 | assertEquals(10.0, coordinates2D[2].y, 0.000001); 43 | 44 | assertEquals(0.0, coordinates2D[3].x, 0.000001); 45 | assertEquals(10.0, coordinates2D[3].y, 0.000001); 46 | 47 | assertEquals(0.0, coordinates2D[4].x, 0.000001); 48 | assertEquals(0.0, coordinates2D[4].y, 0.000001); 49 | 50 | assertEquals(20.0, coordinates2D[5].x, 0.000001); 51 | assertEquals(20.0, coordinates2D[5].y, 0.000001); 52 | 53 | assertEquals(30.0, coordinates2D[6].x, 0.000001); 54 | assertEquals(20.0, coordinates2D[6].y, 0.000001); 55 | 56 | assertEquals(30.0, coordinates2D[7].x, 0.000001); 57 | assertEquals(30.0, coordinates2D[7].y, 0.000001); 58 | 59 | assertEquals(20.0, coordinates2D[8].x, 0.000001); 60 | assertEquals(20.0, coordinates2D[8].y, 0.000001); 61 | 62 | } 63 | 64 | private ByteArrayOutputStream getByteArrayOutputStream() throws IOException 65 | { 66 | final PolygonWritable polygonWritable = new PolygonWritable(); 67 | final Polygon polygon = polygonWritable.polygon; 68 | 69 | polygon.startPath(0, 0); 70 | polygon.lineTo(10, 0); 71 | polygon.lineTo(10, 10); 72 | polygon.lineTo(0, 10); 73 | polygon.lineTo(0, 0); 74 | 75 | polygon.startPath(20, 20); 76 | polygon.lineTo(30, 20); 77 | polygon.lineTo(30, 30); 78 | polygon.lineTo(20, 20); 79 | 80 | polygon.closeAllPaths(); 81 | 82 | final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); 83 | final DataOutput dataOutput = new DataOutputStream(byteArrayOutputStream); 84 | polygonWritable.write(dataOutput); 85 | 86 | byteArrayOutputStream.flush(); 87 | return byteArrayOutputStream; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/test/java/com/esri/io/PolylineMWritableTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.io; 2 | 3 | import org.junit.Test; 4 | 5 | import java.io.ByteArrayInputStream; 6 | import java.io.ByteArrayOutputStream; 7 | import java.io.DataInput; 8 | import java.io.DataInputStream; 9 | import java.io.DataOutput; 10 | import java.io.DataOutputStream; 11 | import java.io.IOException; 12 | 13 | import static org.junit.Assert.assertEquals; 14 | import static org.junit.Assert.assertNotNull; 15 | 16 | /** 17 | */ 18 | public class PolylineMWritableTest 19 | { 20 | @Test 21 | public void testWriteRead() throws IOException 22 | { 23 | final ByteArrayOutputStream byteArrayOutputStream = getByteArrayOutputStream(); 24 | 25 | final DataInput dataInput = new DataInputStream(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())); 26 | final PolylineMWritable polylineM = new PolylineMWritable(); 27 | polylineM.readFields(dataInput); 28 | 29 | assertNotNull(polylineM.lens); 30 | assertEquals(1, polylineM.lens.length); 31 | assertEquals(2, polylineM.lens[0]); 32 | 33 | assertNotNull(polylineM.x); 34 | assertNotNull(polylineM.y); 35 | assertNotNull(polylineM.m); 36 | 37 | assertEquals(2, polylineM.x.length); 38 | assertEquals(2, polylineM.y.length); 39 | assertEquals(2, polylineM.m.length); 40 | 41 | assertEquals(0.0, polylineM.x[0], 0.000001); 42 | assertEquals(100.0, polylineM.x[1], 0.000001); 43 | 44 | assertEquals(0.0, polylineM.y[0], 0.000001); 45 | assertEquals(200.0, polylineM.y[1], 0.000001); 46 | 47 | assertEquals(10.0, polylineM.m[0], 0.000001); 48 | assertEquals(20.0, polylineM.m[1], 0.000001); 49 | 50 | } 51 | 52 | private ByteArrayOutputStream getByteArrayOutputStream() throws IOException 53 | { 54 | final PolylineMWritable polylineM = new PolylineMWritable(); 55 | 56 | polylineM.lens = new int[]{2}; 57 | polylineM.x = new double[]{0, 100}; 58 | polylineM.y = new double[]{0, 200}; 59 | polylineM.m = new double[]{10, 20}; 60 | 61 | final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); 62 | final DataOutput dataOutput = new DataOutputStream(byteArrayOutputStream); 63 | polylineM.write(dataOutput); 64 | 65 | byteArrayOutputStream.flush(); 66 | return byteArrayOutputStream; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapred/DBFReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import org.apache.hadoop.fs.Path; 4 | import org.apache.hadoop.io.LongWritable; 5 | import org.apache.hadoop.io.MapWritable; 6 | import org.apache.hadoop.mapred.FileSplit; 7 | import org.junit.Test; 8 | 9 | import java.io.IOException; 10 | import java.net.URISyntaxException; 11 | 12 | import static org.junit.Assert.assertEquals; 13 | import static org.junit.Assert.assertTrue; 14 | 15 | /** 16 | */ 17 | public class DBFReaderTest extends MapredFS 18 | { 19 | @Test 20 | public void testDBFReader() throws URISyntaxException, IOException 21 | { 22 | final Path dbf = getPath("/testpoint.dbf"); 23 | final FileSplit fileSplit = getFileSplit(dbf); 24 | final DBFRecordReader dbfRecordReader = new DBFRecordReader(fileSplit, m_jobConfig); 25 | final LongWritable key = dbfRecordReader.createKey(); 26 | final MapWritable value = dbfRecordReader.createValue(); 27 | assertTrue(dbfRecordReader.next(key, value)); 28 | assertEquals(0L, key.get()); 29 | assertAttributeKeys(value); 30 | dbfRecordReader.close(); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapred/MapredFS.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.test.MiniFS; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.mapred.FileSplit; 6 | 7 | import java.io.IOException; 8 | 9 | /** 10 | */ 11 | public class MapredFS extends MiniFS 12 | { 13 | protected FileSplit getFileSplit(final Path path) throws IOException 14 | { 15 | final long len = m_fileSystem.getFileStatus(path).getLen(); 16 | return new FileSplit(path, 0, len, (String[]) null); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapred/PointFeatureReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.io.PointFeatureWritable; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.mapred.FileSplit; 7 | import org.apache.hadoop.mapred.RecordReader; 8 | import org.junit.Test; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | 13 | import static org.junit.Assert.assertFalse; 14 | import static org.junit.Assert.assertTrue; 15 | 16 | /** 17 | */ 18 | public class PointFeatureReaderTest extends MapredFS 19 | { 20 | @Test 21 | public void testPointFeatureReader() throws IOException, URISyntaxException 22 | { 23 | final Path dbf = getPath("/testpoint.dbf"); 24 | 25 | final Path shp = getPath("/testpoint.shp"); 26 | 27 | final FileSplit fileSplit = getFileSplit(shp); 28 | 29 | final PointFeatureInputFormat inputFormat = new PointFeatureInputFormat(); 30 | final RecordReader recordReader = inputFormat.getRecordReader(fileSplit, m_jobConfig, null); 31 | final LongWritable key = recordReader.createKey(); 32 | final PointFeatureWritable value = recordReader.createValue(); 33 | assertTrue(recordReader.next(key, value)); 34 | assertPointValues(value); 35 | assertAttributeKeys(value.attributes); 36 | assertFalse(recordReader.next(key, value)); 37 | recordReader.close(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapred/PointReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.io.PointWritable; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.mapred.FileSplit; 7 | import org.apache.hadoop.mapred.RecordReader; 8 | import org.junit.Test; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | 13 | import static org.junit.Assert.assertFalse; 14 | import static org.junit.Assert.assertTrue; 15 | 16 | /** 17 | */ 18 | public class PointReaderTest extends MapredFS 19 | { 20 | @Test 21 | public void testPointReader() throws IOException, URISyntaxException 22 | { 23 | final Path shp = getPath("/testpoint.shp"); 24 | final FileSplit fileSplit = getFileSplit(shp); 25 | final PointInputFormat inputFormat = new PointInputFormat(); 26 | final RecordReader recordReader = inputFormat.getRecordReader(fileSplit, m_jobConfig, null); 27 | final LongWritable key = recordReader.createKey(); 28 | final PointWritable value = recordReader.createValue(); 29 | assertTrue(recordReader.next(key, value)); 30 | assertPointValues(value); 31 | assertFalse(recordReader.next(key, value)); 32 | recordReader.close(); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapred/PolygonFeatureReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.io.PolygonFeatureWritable; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.mapred.FileSplit; 7 | import org.apache.hadoop.mapred.RecordReader; 8 | import org.junit.Test; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | 13 | import static org.junit.Assert.assertFalse; 14 | import static org.junit.Assert.assertTrue; 15 | 16 | /** 17 | */ 18 | public class PolygonFeatureReaderTest extends MapredFS 19 | { 20 | @Test 21 | public void testPolygonFeatureReader() throws IOException, URISyntaxException 22 | { 23 | final Path dbf = getPath("/testpolygon.dbf"); 24 | final Path shp = getPath("/testpolygon.shp"); 25 | final FileSplit fileSplit = getFileSplit(shp); 26 | final PolygonFeatureInputFormat inputFormat = new PolygonFeatureInputFormat(); 27 | final RecordReader recordReader = inputFormat.getRecordReader(fileSplit, m_jobConfig, null); 28 | final LongWritable key = recordReader.createKey(); 29 | final PolygonFeatureWritable value = recordReader.createValue(); 30 | assertTrue(recordReader.next(key, value)); 31 | assertPolygonValues(value); 32 | assertPolygonNameValue(value.attributes); 33 | assertFalse(recordReader.next(key, value)); 34 | recordReader.close(); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapred/PolygonReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapred; 2 | 3 | import com.esri.io.PolygonWritable; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.mapred.FileSplit; 7 | import org.apache.hadoop.mapred.RecordReader; 8 | import org.junit.Test; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | 13 | import static org.junit.Assert.assertFalse; 14 | import static org.junit.Assert.assertTrue; 15 | 16 | /** 17 | */ 18 | public class PolygonReaderTest extends MapredFS 19 | { 20 | @Test 21 | public void testPolygonReader() throws IOException, URISyntaxException 22 | { 23 | final Path shp = getPath("/testpolygon.shp"); 24 | final FileSplit fileSplit = getFileSplit(shp); 25 | final PolygonInputFormat inputFormat = new PolygonInputFormat(); 26 | final RecordReader recordReader = inputFormat.getRecordReader(fileSplit, m_jobConfig, null); 27 | final LongWritable key = recordReader.createKey(); 28 | final PolygonWritable value = recordReader.createValue(); 29 | assertTrue(recordReader.next(key, value)); 30 | assertPolygonValues(value); 31 | assertFalse(recordReader.next(key, value)); 32 | recordReader.close(); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapreduce/DBFReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import org.apache.hadoop.fs.Path; 4 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 5 | import org.apache.hadoop.mapreduce.TaskAttemptID; 6 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 7 | import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; 8 | import org.junit.Test; 9 | 10 | import java.io.IOException; 11 | import java.net.URISyntaxException; 12 | 13 | import static org.junit.Assert.assertEquals; 14 | import static org.junit.Assert.assertTrue; 15 | 16 | /** 17 | */ 18 | public class DBFReaderTest extends MapreduceFS 19 | { 20 | @Test 21 | public void testDBFReader() throws URISyntaxException, IOException, InterruptedException 22 | { 23 | final Path dbf = getPath("/testpoint.dbf"); 24 | final FileSplit fileSplit = getFileSplit(dbf); 25 | final TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(m_jobConfig, new TaskAttemptID()); 26 | final DBFRecordReader dbfRecordReader = new DBFRecordReader(fileSplit, taskAttemptContext); 27 | assertTrue(dbfRecordReader.nextKeyValue()); 28 | assertEquals(0L, dbfRecordReader.getCurrentKey().get()); 29 | assertAttributeKeys(dbfRecordReader.getCurrentValue()); 30 | dbfRecordReader.close(); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapreduce/MapreduceFS.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.test.MiniFS; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 6 | 7 | import java.io.IOException; 8 | 9 | /** 10 | */ 11 | public class MapreduceFS extends MiniFS 12 | { 13 | protected FileSplit getFileSplit(final Path dst) throws IOException 14 | { 15 | final long len = m_fileSystem.getFileStatus(dst).getLen(); 16 | return new FileSplit(dst, 0, len, null); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapreduce/PointFeatureReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.io.PointFeatureWritable; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.mapreduce.RecordReader; 7 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 8 | import org.apache.hadoop.mapreduce.TaskAttemptID; 9 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 10 | import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; 11 | import org.junit.Test; 12 | 13 | import java.io.IOException; 14 | import java.net.URISyntaxException; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | import static org.junit.Assert.assertFalse; 18 | import static org.junit.Assert.assertTrue; 19 | 20 | /** 21 | */ 22 | public class PointFeatureReaderTest extends MapreduceFS 23 | { 24 | @Test 25 | public void testPointFeatureReader() throws URISyntaxException, IOException, InterruptedException 26 | { 27 | final Path dbf = getPath("/testpoint.dbf"); 28 | final Path shp = getPath("/testpoint.shp"); 29 | final FileSplit fileSplit = getFileSplit(shp); 30 | final PointFeatureInputFormat inputFormat = new PointFeatureInputFormat(); 31 | final TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(m_jobConfig, new TaskAttemptID()); 32 | final RecordReader recordReader = inputFormat.createRecordReader(fileSplit, taskAttemptContext); 33 | assertTrue(recordReader.nextKeyValue()); 34 | final LongWritable key = recordReader.getCurrentKey(); 35 | assertEquals(1L, key.get()); 36 | final PointFeatureWritable value = recordReader.getCurrentValue(); 37 | assertPointValues(value); 38 | assertAttributeKeys(value.attributes); 39 | assertFalse(recordReader.nextKeyValue()); 40 | recordReader.close(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapreduce/PointReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.io.PointWritable; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.mapreduce.RecordReader; 7 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 8 | import org.apache.hadoop.mapreduce.TaskAttemptID; 9 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 10 | import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; 11 | import org.junit.Test; 12 | 13 | import java.io.IOException; 14 | import java.net.URISyntaxException; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | import static org.junit.Assert.assertFalse; 18 | import static org.junit.Assert.assertTrue; 19 | 20 | /** 21 | */ 22 | public class PointReaderTest extends MapreduceFS 23 | { 24 | @Test 25 | public void testPointReader() throws IOException, URISyntaxException, InterruptedException 26 | { 27 | final Path shp = getPath("/testpoint.shp"); 28 | final FileSplit fileSplit = getFileSplit(shp); 29 | final PointInputFormat pointInputFormat = new PointInputFormat(); 30 | final TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(m_jobConfig, new TaskAttemptID()); 31 | final RecordReader recordReader = pointInputFormat.createRecordReader(fileSplit, taskAttemptContext); 32 | assertTrue(recordReader.nextKeyValue()); 33 | assertEquals(1L, recordReader.getCurrentKey().get()); 34 | assertPointValues(recordReader.getCurrentValue()); 35 | assertFalse(recordReader.nextKeyValue()); 36 | recordReader.close(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapreduce/PolygonFeatureReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.io.PolygonFeatureWritable; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.mapreduce.RecordReader; 7 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 8 | import org.apache.hadoop.mapreduce.TaskAttemptID; 9 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 10 | import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; 11 | import org.junit.Test; 12 | 13 | import java.io.IOException; 14 | import java.net.URISyntaxException; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | import static org.junit.Assert.assertFalse; 18 | import static org.junit.Assert.assertTrue; 19 | 20 | /** 21 | */ 22 | public class PolygonFeatureReaderTest extends MapreduceFS 23 | { 24 | @Test 25 | public void testPolygonFeatureReader() throws IOException, URISyntaxException, InterruptedException 26 | { 27 | final Path dbf = getPath("/testpolygon.dbf"); 28 | final Path shp = getPath("/testpolygon.shp"); 29 | final FileSplit fileSplit = getFileSplit(shp); 30 | final PolygonFeatureInputFormat pointInputFormat = new PolygonFeatureInputFormat(); 31 | final TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(m_jobConfig, new TaskAttemptID()); 32 | final RecordReader recordReader = pointInputFormat.createRecordReader(fileSplit, taskAttemptContext); 33 | assertTrue(recordReader.nextKeyValue()); 34 | assertEquals(1L, recordReader.getCurrentKey().get()); 35 | assertPolygonValues(recordReader.getCurrentValue()); 36 | assertPolygonNameValue(recordReader.getCurrentValue().attributes); 37 | assertFalse(recordReader.nextKeyValue()); 38 | recordReader.close(); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/com/esri/mapreduce/PolygonReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.mapreduce; 2 | 3 | import com.esri.io.PolygonWritable; 4 | import org.apache.hadoop.fs.Path; 5 | import org.apache.hadoop.io.LongWritable; 6 | import org.apache.hadoop.mapreduce.RecordReader; 7 | import org.apache.hadoop.mapreduce.TaskAttemptContext; 8 | import org.apache.hadoop.mapreduce.TaskAttemptID; 9 | import org.apache.hadoop.mapreduce.lib.input.FileSplit; 10 | import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; 11 | import org.junit.Test; 12 | 13 | import java.io.IOException; 14 | import java.net.URISyntaxException; 15 | 16 | import static org.junit.Assert.assertEquals; 17 | import static org.junit.Assert.assertFalse; 18 | import static org.junit.Assert.assertTrue; 19 | 20 | /** 21 | */ 22 | public class PolygonReaderTest extends MapreduceFS 23 | { 24 | @Test 25 | public void testPolygonReader() throws IOException, URISyntaxException, InterruptedException 26 | { 27 | final Path shp = getPath("/testpolygon.shp"); 28 | final FileSplit fileSplit = getFileSplit(shp); 29 | final PolygonInputFormat pointInputFormat = new PolygonInputFormat(); 30 | final TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(m_jobConfig, new TaskAttemptID()); 31 | final RecordReader recordReader = pointInputFormat.createRecordReader(fileSplit, taskAttemptContext); 32 | assertTrue(recordReader.nextKeyValue()); 33 | assertEquals(1L, recordReader.getCurrentKey().get()); 34 | assertPolygonValues(recordReader.getCurrentValue()); 35 | assertFalse(recordReader.nextKeyValue()); 36 | recordReader.close(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/java/com/esri/shp/PointTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.shp; 2 | 3 | import com.esri.core.geometry.Point; 4 | import org.junit.Test; 5 | 6 | import java.io.DataInputStream; 7 | import java.io.IOException; 8 | import java.io.InputStream; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | import static org.junit.Assert.assertNotNull; 12 | import static org.junit.Assert.assertTrue; 13 | 14 | /** 15 | */ 16 | public class PointTest 17 | { 18 | @Test 19 | public void testReadPoint() throws IOException 20 | { 21 | final InputStream inputStream = this.getClass().getResourceAsStream("/testpoint.shp"); 22 | assertNotNull(inputStream); 23 | try 24 | { 25 | // One point shapefile - so extent is the point 26 | final ShpReader shpReader = new ShpReader(new DataInputStream(inputStream)); 27 | final ShpHeader shpHeader = shpReader.getHeader(); 28 | assertTrue(shpReader.hasMore()); 29 | final Point point = shpReader.readPoint(); 30 | assertEquals(shpHeader.xmin, point.getX(), 0.000001); 31 | assertEquals(shpHeader.ymin, point.getY(), 0.000001); 32 | } 33 | finally 34 | { 35 | inputStream.close(); 36 | } 37 | } 38 | 39 | @Test 40 | public void testQueryPoint() throws IOException 41 | { 42 | final InputStream inputStream = this.getClass().getResourceAsStream("/testpoint.shp"); 43 | assertNotNull(inputStream); 44 | try 45 | { 46 | // One point shapefile - so extent is the point 47 | final ShpReader shpReader = new ShpReader(new DataInputStream(inputStream)); 48 | final ShpHeader shpHeader = shpReader.getHeader(); 49 | assertEquals(1, shpHeader.shapeType); 50 | assertTrue(shpReader.hasMore()); 51 | final Point point = new Point(); 52 | shpReader.queryPoint(point); 53 | assertEquals(shpHeader.xmin, point.getX(), 0.000001); 54 | assertEquals(shpHeader.ymin, point.getY(), 0.000001); 55 | } 56 | finally 57 | { 58 | inputStream.close(); 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/test/java/com/esri/shp/PolygonTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.shp; 2 | 3 | import com.esri.core.geometry.Envelope2D; 4 | import com.esri.core.geometry.Polygon; 5 | import org.junit.Test; 6 | 7 | import java.io.DataInputStream; 8 | import java.io.IOException; 9 | import java.io.InputStream; 10 | 11 | import static org.junit.Assert.assertEquals; 12 | import static org.junit.Assert.assertNotNull; 13 | import static org.junit.Assert.assertTrue; 14 | 15 | /** 16 | */ 17 | public class PolygonTest 18 | { 19 | @Test 20 | public void testReadPolygon() throws IOException 21 | { 22 | final InputStream inputStream = this.getClass().getResourceAsStream("/testpolygon.shp"); 23 | assertNotNull(inputStream); 24 | try 25 | { 26 | final ShpReader shpReader = new ShpReader(new DataInputStream(inputStream)); 27 | final ShpHeader shpHeader = shpReader.getHeader(); 28 | assertTrue(shpReader.hasMore()); 29 | final Polygon polygon = shpReader.readPolygon(); 30 | final Envelope2D enveloper2D = new Envelope2D(); 31 | polygon.queryEnvelope2D(enveloper2D); 32 | assertEquals(shpHeader.xmin, enveloper2D.xmin, 0.000001); 33 | assertEquals(shpHeader.ymin, enveloper2D.ymin, 0.000001); 34 | assertEquals(shpHeader.xmax, enveloper2D.xmax, 0.000001); 35 | assertEquals(shpHeader.ymax, enveloper2D.ymax, 0.000001); 36 | } 37 | finally 38 | { 39 | inputStream.close(); 40 | } 41 | } 42 | 43 | @Test 44 | public void testQueryPolygon() throws IOException 45 | { 46 | final InputStream inputStream = this.getClass().getResourceAsStream("/testpolygon.shp"); 47 | assertNotNull(inputStream); 48 | try 49 | { 50 | final ShpReader shpReader = new ShpReader(new DataInputStream(inputStream)); 51 | final ShpHeader shpHeader = shpReader.getHeader(); 52 | assertEquals(5, shpHeader.shapeType); 53 | assertTrue(shpReader.hasMore()); 54 | final Polygon polygon = new Polygon(); 55 | shpReader.queryPolygon(polygon); 56 | final Envelope2D enveloper2D = new Envelope2D(); 57 | polygon.queryEnvelope2D(enveloper2D); 58 | assertEquals(shpHeader.xmin, enveloper2D.xmin, 0.000001); 59 | assertEquals(shpHeader.ymin, enveloper2D.ymin, 0.000001); 60 | assertEquals(shpHeader.xmax, enveloper2D.xmax, 0.000001); 61 | assertEquals(shpHeader.ymax, enveloper2D.ymax, 0.000001); 62 | } 63 | finally 64 | { 65 | inputStream.close(); 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/test/java/com/esri/shp/PolylineMTest.java: -------------------------------------------------------------------------------- 1 | package com.esri.shp; 2 | 3 | import com.esri.io.PolylineMWritable; 4 | import org.junit.Test; 5 | 6 | import java.io.DataInputStream; 7 | import java.io.IOException; 8 | import java.io.InputStream; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | import static org.junit.Assert.assertNotNull; 12 | import static org.junit.Assert.assertTrue; 13 | 14 | /** 15 | */ 16 | public class PolylineMTest 17 | { 18 | @Test 19 | public void testReadPolygon() throws IOException 20 | { 21 | final InputStream inputStream = this.getClass() 22 | .getResourceAsStream("/testpolylinem.shp"); 23 | assertNotNull(inputStream); 24 | try 25 | { 26 | final ShpReader shpReader = new ShpReader(new DataInputStream(inputStream)); 27 | final ShpHeader shpHeader = shpReader.getHeader(); 28 | 29 | assertEquals(10.0, shpHeader.mmin, 0.000001); 30 | assertEquals(20.0, shpHeader.mmax, 0.000001); 31 | 32 | assertTrue(shpReader.hasMore()); 33 | final PolylineMWritable polylineM = shpReader.readPolylineMWritable(); 34 | 35 | assertNotNull(polylineM.lens); 36 | assertEquals(1, polylineM.lens.length); 37 | assertEquals(2, polylineM.lens[0]); 38 | 39 | assertNotNull(polylineM.x); 40 | assertNotNull(polylineM.y); 41 | assertNotNull(polylineM.m); 42 | 43 | assertEquals(2, polylineM.x.length); 44 | assertEquals(2, polylineM.y.length); 45 | assertEquals(2, polylineM.m.length); 46 | 47 | assertEquals(0.0, polylineM.x[0], 0.000001); 48 | assertEquals(100.0, polylineM.x[1], 0.000001); 49 | 50 | assertEquals(0.0, polylineM.y[0], 0.000001); 51 | assertEquals(200.0, polylineM.y[1], 0.000001); 52 | 53 | assertEquals(10.0, polylineM.m[0], 0.000001); 54 | assertEquals(20.0, polylineM.m[1], 0.000001); 55 | 56 | } 57 | finally 58 | { 59 | inputStream.close(); 60 | } 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/test/java/com/esri/test/MiniFS.java: -------------------------------------------------------------------------------- 1 | package com.esri.test; 2 | 3 | import com.esri.core.geometry.Envelope; 4 | import com.esri.core.geometry.Point; 5 | import com.esri.core.geometry.Polygon; 6 | import com.esri.io.PointWritable; 7 | import com.esri.io.PolygonWritable; 8 | import org.apache.commons.io.FileUtils; 9 | import org.apache.hadoop.conf.Configuration; 10 | import org.apache.hadoop.fs.FileSystem; 11 | import org.apache.hadoop.fs.Path; 12 | import org.apache.hadoop.hdfs.MiniDFSCluster; 13 | import org.apache.hadoop.io.MapWritable; 14 | import org.apache.hadoop.io.Text; 15 | import org.apache.hadoop.mapred.JobConf; 16 | import org.apache.log4j.Logger; 17 | import org.junit.After; 18 | import org.junit.Before; 19 | 20 | import java.io.File; 21 | import java.io.IOException; 22 | import java.net.URISyntaxException; 23 | import java.net.URL; 24 | 25 | import static org.apache.log4j.Level.ERROR; 26 | import static org.junit.Assert.assertEquals; 27 | import static org.junit.Assert.assertTrue; 28 | 29 | /** 30 | */ 31 | public class MiniFS 32 | { 33 | protected FileSystem m_fileSystem; 34 | protected JobConf m_jobConfig; 35 | protected MiniDFSCluster m_dfsCluster; 36 | 37 | public void setupMetricsLogging() 38 | { 39 | Logger.getLogger(org.apache.hadoop.metrics2.util.MBeans.class).setLevel(ERROR); 40 | Logger.getLogger(org.apache.hadoop.metrics2.impl.MetricsSystemImpl.class).setLevel(ERROR); 41 | } 42 | 43 | @Before 44 | public void setUp() throws Exception 45 | { 46 | setupMetricsLogging(); 47 | 48 | final File tmpDir = File.createTempFile("dfs", ""); 49 | 50 | final Configuration config = new Configuration(); 51 | config.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); 52 | config.setBoolean("dfs.permissions", false); 53 | config.setInt("dfs.replication", 1); 54 | config.set("dfs.datanode.data.dir.perm", "777"); 55 | 56 | if (tmpDir.exists()) 57 | { 58 | FileUtils.forceDelete(tmpDir); 59 | } 60 | FileUtils.forceMkdir(tmpDir); 61 | 62 | // used by MiniDFSCluster for DFS storage 63 | System.setProperty("test.build.data", new File(tmpDir, "data").getAbsolutePath()); 64 | 65 | // required by JobHistory.initLogDir 66 | System.setProperty("hadoop.log.dir", new File(tmpDir, "logs").getAbsolutePath()); 67 | 68 | m_jobConfig = new JobConf(config); 69 | m_dfsCluster = new MiniDFSCluster.Builder(m_jobConfig).numDataNodes(1).format(true).build(); 70 | m_fileSystem = m_dfsCluster.getFileSystem(); 71 | m_dfsCluster.waitClusterUp(); 72 | } 73 | 74 | @After 75 | public void tearDown() throws Exception 76 | { 77 | m_dfsCluster.shutdown(); 78 | } 79 | 80 | protected void assertPointValues(final PointWritable value) 81 | { 82 | final Point point = value.point; 83 | assertEquals(-99.79634094297234, point.getX(), 0.000001); 84 | assertEquals(39.486310278100405, point.getY(), 0.000001); 85 | } 86 | 87 | protected void assertPolygonValues(final PolygonWritable value) 88 | { 89 | final Polygon polygon = value.polygon; 90 | final Envelope env = new Envelope(); 91 | polygon.queryEnvelope(env); 92 | assertEquals(-118.45964524998351, env.getXMin(), 0.000001); 93 | assertEquals(37.038663811607194, env.getYMin(), 0.000001); 94 | assertEquals(-104.84461178011469, env.getXMax(), 0.000001); 95 | assertEquals(44.76404797147654, env.getYMax(), 0.000001); 96 | } 97 | 98 | protected void assertAttributeKeys(final MapWritable value) 99 | { 100 | assertTrue(value.containsKey(new Text("AShort"))); 101 | assertTrue(value.containsKey(new Text("ALong"))); 102 | assertTrue(value.containsKey(new Text("AFloat"))); 103 | assertTrue(value.containsKey(new Text("ANume106"))); 104 | assertTrue(value.containsKey(new Text("AText50"))); 105 | assertTrue(value.containsKey(new Text("ADate"))); 106 | } 107 | 108 | protected Path getPath(final String name) throws IOException, URISyntaxException 109 | { 110 | final URL resource = getClass().getResource(name); 111 | final Path path = new Path(name); 112 | m_fileSystem.copyFromLocalFile(new Path(resource.toURI()), path); 113 | return path; 114 | } 115 | 116 | protected void assertPolygonNameValue(final MapWritable attributes) 117 | { 118 | assertTrue(attributes.containsKey(new Text("Name"))); 119 | assertEquals(new Text("FooBar"), new Text(attributes.get(new Text("Name")).toString().trim())); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/test/python/CreatePolylineM.py: -------------------------------------------------------------------------------- 1 | import arcpy 2 | 3 | 4 | class CreatePolylineM(object): 5 | def __init__(self): 6 | self.label = "Create Tool" 7 | self.description = "" 8 | self.canRunInBackground = False 9 | 10 | def getParameterInfo(self): 11 | paramFC = arcpy.Parameter( 12 | name="links", 13 | displayName="links", 14 | direction="Output", 15 | datatype="DEFeatureClass", 16 | parameterType="Derived") 17 | return [paramFC] 18 | 19 | def isLicensed(self): 20 | return True 21 | 22 | def updateParameters(self, parameters): 23 | return 24 | 25 | def updateMessages(self, parameters): 26 | return 27 | 28 | def execute(self, parameters, messages): 29 | fc = "in_memory/Links" 30 | if arcpy.Exists(fc): 31 | arcpy.management.Delete(fc) 32 | spref = arcpy.SpatialReference(102100) 33 | arcpy.management.CreateFeatureclass("in_memory", "Links", "POLYLINE", 34 | spatial_reference=spref, 35 | has_m="ENABLED", 36 | has_z="DISABLED") 37 | 38 | cursor = arcpy.da.InsertCursor(fc, ['SHAPE@']) 39 | 40 | a = arcpy.Array() 41 | a.add(arcpy.Point(0, 0, 0, 10)) 42 | a.add(arcpy.Point(100, 200, 0, 20)) 43 | polylineM = arcpy.Polyline(a) 44 | cursor.insertRow([polylineM]) 45 | 46 | parameters[0].value = fc 47 | return 48 | -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootCategory=ERROR, stdout 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 4 | log4j.appender.stdout.target=System.err 5 | log4j.appender.stdout.layout.ConversionPattern=%5p %m%n 6 | -------------------------------------------------------------------------------- /src/test/resources/testpoint.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/Shapefile/cabbd189576c0cfa5d9b689d71213d5d44d63d1a/src/test/resources/testpoint.dbf -------------------------------------------------------------------------------- /src/test/resources/testpoint.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/Shapefile/cabbd189576c0cfa5d9b689d71213d5d44d63d1a/src/test/resources/testpoint.shp -------------------------------------------------------------------------------- /src/test/resources/testpolygon.dbf: -------------------------------------------------------------------------------- 1 | rA=WNameC< FooBar  -------------------------------------------------------------------------------- /src/test/resources/testpolygon.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/Shapefile/cabbd189576c0cfa5d9b689d71213d5d44d63d1a/src/test/resources/testpolygon.shp -------------------------------------------------------------------------------- /src/test/resources/testpolylinem.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/Shapefile/cabbd189576c0cfa5d9b689d71213d5d44d63d1a/src/test/resources/testpolylinem.shp --------------------------------------------------------------------------------