├── .gitignore ├── LICENSE ├── README.md ├── media └── TestToolbox.png ├── pom.xml └── src ├── main ├── python │ ├── com │ │ ├── __init__.py │ │ └── esri │ │ │ ├── __init__.py │ │ │ └── udt │ │ │ └── __init__.py │ ├── udtapp.py │ └── udtapp.sh ├── resources │ ├── log4j.properties │ └── logback.xml └── scala │ └── com │ └── esri │ ├── gdb │ ├── CatRow.scala │ ├── DataBuffer.scala │ ├── DefaultSource.scala │ ├── EsriFieldType.scala │ ├── Field.scala │ ├── FieldBinary.scala │ ├── FieldBytes.scala │ ├── FieldDateTime.scala │ ├── FieldFloat32.scala │ ├── FieldFloat64.scala │ ├── FieldGeom.scala │ ├── FieldGeomNoop.scala │ ├── FieldInt16.scala │ ├── FieldInt32.scala │ ├── FieldOID.scala │ ├── FieldPointMType.scala │ ├── FieldPointType.scala │ ├── FieldPointZMType.scala │ ├── FieldPointZType.scala │ ├── FieldPoly.scala │ ├── FieldPoly2Type.scala │ ├── FieldPoly3Type.scala │ ├── FieldPolygon.scala │ ├── FieldPolygonType.scala │ ├── FieldPolyline.scala │ ├── FieldPolylineMType.scala │ ├── FieldPolylineType.scala │ ├── FieldString.scala │ ├── FieldUUID.scala │ ├── GDBIndex.scala │ ├── GDBRDD.scala │ ├── GDBRelation.scala │ ├── GDBRowIterator.scala │ ├── GDBTable.scala │ ├── GDBTableScanWithNullValues.scala │ ├── GDBTableSeekWithNoNullValues.scala │ ├── GDBTableSeekWithNullValues.scala │ ├── IndexInfo.scala │ └── package.scala │ └── udt │ ├── PointMType.scala │ ├── PointMUDT.scala │ ├── PointType.scala │ ├── PointUDT.scala │ ├── PointZMType.scala │ ├── PointZMUDT.scala │ ├── PointZType.scala │ ├── PointZUDT.scala │ ├── PolyType.scala │ ├── PolyUDT.scala │ ├── PolygonType.scala │ ├── PolygonUDT.scala │ ├── PolylineMType.scala │ ├── PolylineMUDT.scala │ ├── PolylineType.scala │ ├── PolylineUDT.scala │ ├── Segment.scala │ ├── SegmentIterator.scala │ └── SpatialType.scala └── test ├── python └── TestToolbox.pyt ├── resources ├── Test.gdb │ ├── a00000001.TablesByName.atx │ ├── a00000001.freelist │ ├── a00000001.gdbindexes │ ├── a00000001.gdbtable │ ├── a00000001.gdbtablx │ ├── a00000002.gdbtable │ ├── a00000002.gdbtablx │ ├── a00000003.gdbindexes │ ├── a00000003.gdbtable │ ├── a00000003.gdbtablx │ ├── a00000004.CatItemsByPhysicalName.atx │ ├── a00000004.CatItemsByType.atx │ ├── a00000004.FDO_UUID.atx │ ├── a00000004.freelist │ ├── a00000004.gdbindexes │ ├── a00000004.gdbtable │ ├── a00000004.gdbtablx │ ├── a00000004.spx │ ├── a00000005.CatItemTypesByName.atx │ ├── a00000005.CatItemTypesByParentTypeID.atx │ ├── a00000005.CatItemTypesByUUID.atx │ ├── a00000005.gdbindexes │ ├── a00000005.gdbtable │ ├── a00000005.gdbtablx │ ├── a00000006.CatRelsByDestinationID.atx │ ├── a00000006.CatRelsByOriginID.atx │ ├── a00000006.CatRelsByType.atx │ ├── a00000006.FDO_UUID.atx │ ├── a00000006.freelist │ ├── a00000006.gdbindexes │ ├── a00000006.gdbtable │ ├── a00000006.gdbtablx │ ├── a00000007.CatRelTypesByBackwardLabel.atx │ ├── a00000007.CatRelTypesByDestItemTypeID.atx │ ├── a00000007.CatRelTypesByForwardLabel.atx │ ├── a00000007.CatRelTypesByName.atx │ ├── a00000007.CatRelTypesByOriginItemTypeID.atx │ ├── a00000007.CatRelTypesByUUID.atx │ ├── a00000007.gdbindexes │ ├── a00000007.gdbtable │ ├── a00000007.gdbtablx │ ├── a00000009.gdbindexes │ ├── a00000009.gdbtable │ ├── a00000009.gdbtablx │ ├── a00000009.spx │ ├── a0000000a.gdbindexes │ ├── a0000000a.gdbtable │ ├── a0000000a.gdbtablx │ ├── a0000000a.spx │ ├── a0000000d.gdbindexes │ ├── a0000000d.gdbtable │ ├── a0000000d.gdbtablx │ ├── a0000000d.spx │ ├── a0000000e.gdbindexes │ ├── a0000000e.gdbtable │ ├── a0000000e.gdbtablx │ ├── a0000000e.spx │ ├── a0000000f.gdbindexes │ ├── a0000000f.gdbtable │ ├── a0000000f.gdbtablx │ ├── a0000000f.spx │ ├── a00000010.gdbindexes │ ├── a00000010.gdbtable │ ├── a00000010.gdbtablx │ ├── a00000010.spx │ ├── a00000011.gdbindexes │ ├── a00000011.gdbtable │ ├── a00000011.gdbtablx │ ├── a00000011.spx │ ├── a00000012.gdbindexes │ ├── a00000012.gdbtable │ ├── a00000012.gdbtablx │ ├── a00000012.spx │ ├── gdb │ └── timestamps └── log4j.properties └── scala └── com └── esri ├── app ├── GDBApp.scala └── Main.scala ├── gdb └── GDBSuite.scala └── udt ├── SegmentIteratorSpec.scala └── SerDeSpec.scala /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target/ 3 | metastore_db/ 4 | project/ 5 | src/test/python/*.xml 6 | *.iml 7 | ss-*.sh 8 | dependency-reduced-pom.xml 9 | derby.log 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spark GDB Library 2 | 3 | A library for parsing and querying an [Esri File Geodatabase](http://www.esri.com/news/arcuser/0309/files/9reasons.pdf) with Apache Spark. 4 | 5 | This work in progress is a pure Scala read-only implementation based on [this](https://github.com/rouault/dump_gdbtable/wiki/FGDB-Spec) reverse engineered specification. 6 | Understanding the internal file structure enables partitioning to perform massive parallel reading. 7 | The reading API is based on the [Hadoop File System API](https://hadoop.apache.org/docs/r2.7.1/api/index.html?org/apache/hadoop/fs/FileSystem.html) enabling the placement of the GDB in HDFS or S3 (not tested) for multi node access. 8 | There is still a lot to be done, but is a good start. Eventually, I will merge this project with my [Ibn Battuta Project](https://github.com/mraad/ibn-battuta). 9 | 10 | ### TODO (not in specified order) 11 | 12 | Update (Jan 7 2016) - for Polylines and Polygons, I'm mimicking the core Spark Vector class. In a way, they have almost the same semantics. 13 | 14 | * Implement asGeometry 15 | * ~~Use [Esri Geometry Library](https://github.com/Esri/geometry-api-java) rather than JTS (I love JTS, so many utility functions on they geometry model)~~ 16 | * Implement ~~Point~~, ~~Polyline~~ and ~~Polygon~~ as Spatial Type using UDT spec. 17 | * Handle more shapes - multiXXX and with Z and M 18 | * Read default values in field definitions 19 | * Register custom [Kryo](https://github.com/EsotericSoftware/kryo) serializer for shapes (optimization - but worth it :-) 20 | * Perform a scan rather than a seek if the index row count is the same as the table count (should help performance) 21 | * Test Multi part geometries 22 | * Test XML field type 23 | * Test Blob field type 24 | * Handle Raster (super low priority) 25 | * Make internal structure of Shapes more like [GeoJSON](http://geojson.org/geojson-spec.html) but will be heavy for SerDe ! 26 | 27 | ## Building From Source 28 | 29 | This project build process is based on [Apache Maven](https://maven.apache.org/) 30 | 31 | ```bash 32 | export SPARK_LOCAL_IP=localhost 33 | mvn install 34 | ``` 35 | 36 | The test data in `src/test/resources/Test.gdb` was generated using the ArcPy Tool `src/test/python/TestToolbox.pyt` 37 | 38 | ![](media/TestToolbox.png) 39 | 40 | Thought the coordinates of the shapes are random, the coordinates values are placed as attributes for testing. 41 | In the case of the `Points` feature class, the x/y coordinate values should match the values in the attributes `X` and `Y` enabling cross checking during testing. 42 | 43 | ## Using with Spark shell 44 | 45 | ```bash 46 | $SPARK_HOME/bin/spark-shell --packages com.esri:spark-gdb:0.7 47 | ``` 48 | 49 | ```scala 50 | import com.esri.gdb._ 51 | import com.esri.udt._ 52 | sc.gdbFile("src/test/resources/Test.gdb", "Points", numPartitions = 2).map(row => { 53 | row.getAs[PointType](row.fieldIndex("Shape")) 54 | }).foreach(println) 55 | ``` 56 | 57 | ```scala 58 | val df = sqlContext.read. 59 | format("com.esri.gdb"). 60 | option("path", "src/test/resources/Test.gdb"). 61 | option("name", "Points"). 62 | load() 63 | df.printSchema() 64 | df.registerTempTable("points") 65 | sqlContext.sql(s"select * from points").show() 66 | ``` 67 | 68 | ### Using UDT and UDFs 69 | 70 | For the Spatial UDT (User Defined Types), I am following the `VectorUDT` implementation. 71 | 72 | In Scala: 73 | 74 | ```scala 75 | val df = sqlContext.read.format("com.esri.gdb") 76 | .option("path", path) 77 | .option("name", name) 78 | .option("numPartitions", "1") 79 | .load() 80 | 81 | df.printSchema() 82 | df.registerTempTable(name) 83 | 84 | sqlContext.udf.register("getX", (point: PointType) => point.x) 85 | sqlContext.udf.register("getY", (point: PointType) => point.y) 86 | sqlContext.udf.register("plus2", (point: PointType) => PointType(point.x + 2, point.y + 2)) 87 | 88 | sqlContext.sql(s"select getX(plus2(Shape)),getX(Shape) as y from $name") 89 | .show(20) 90 | ``` 91 | 92 | In Python: 93 | 94 | ```python 95 | df = sqlContext.read \ 96 | .format("com.esri.gdb") \ 97 | .options(path="../../test/resources/Test.gdb", name=gdb_name, numPartitions="1") \ 98 | .load() 99 | 100 | df.printSchema() 101 | 102 | df.registerTempTable(gdb_name) 103 | 104 | sqlContext.registerFunction("getX", lambda p: p.x, DoubleType()) 105 | sqlContext.registerFunction("getY", lambda p: p.y, DoubleType()) 106 | sqlContext.registerFunction("plus2", lambda p: PointType(p.x + 2, p.y + 2), PointUDT()) 107 | 108 | rows = sqlContext.sql("select plus2(Shape),X,Y from {}".format(gdb_name)) 109 | for row in rows.collect(): 110 | print row 111 | ``` 112 | 113 | ## Testing In HDFS (Yet Another Excuse To Use Docker :-) 114 | 115 | We will use [Docker](https://www.docker.com/) to bootstrap a [Cloudera quickstart](https://www.cloudera.com/content/www/en-us/documentation/enterprise/latest/topics/quickstart_docker_container.html) container instance. 116 | 117 | I **highly** recommend the installation of the [Docker Toolbox](https://www.docker.com/docker-toolbox) for a Docker quick start. 118 | 119 | Compile the project with the `quickstart` profile: 120 | ```bash 121 | mvn -Pquickstart clean package 122 | ``` 123 | 124 | Create a local docker enabled machine using 6 cores, 8GB of RAM and with 10 GB of virtual disk: 125 | ```bash 126 | docker-machine create\ 127 | --driver virtualbox\ 128 | --virtualbox-cpu-count 6\ 129 | --virtualbox-memory 8192\ 130 | --virtualbox-disk-size 10240\ 131 | --virtualbox-no-vtx-check\ 132 | quickstart 133 | ``` 134 | 135 | On Windows, I had to upgrade my machine instance: 136 | ```bash 137 | docker-machine upgrade quickstart 138 | ``` 139 | 140 | Set up the docker environment: 141 | ```bash 142 | eval $(docker-machine env quickstart) 143 | ``` 144 | 145 | Start a single node Hadoop instance with ZooKeeper, HDFS, YARN and Spark. 146 | (This is a **4GB** download, so go grab some coffee and walk your dog, it is gonna take a while ! But, you only have to do that once. Ah... the beauty of docker images :-) 147 | We expose the ports for Cloudera Manager (7180), Hue (8888) and NameNode (50070). 148 | And to facilitate the moving of jars and test data from the host into the container, we map the `/Users` host folder onto the container `/Users` folder. 149 | ```bash 150 | docker run\ 151 | --privileged=true\ 152 | --hostname=quickstart.cloudera\ 153 | -v /Users:/Users\ 154 | -p 7180:7180\ 155 | -p 8888:8888\ 156 | -p 50070:50070\ 157 | -p 50075:50075\ 158 | -p 21050:21050\ 159 | -t -i cloudera/quickstart:latest\ 160 | /usr/bin/docker-quickstart 161 | ``` 162 | 163 | Copy `Test.gdb` to HDFS: 164 | 165 | ```bash 166 | hadoop fs -mkdir /data 167 | hadoop fs -put /Users//spark-gdb/src/test/resources/Test.gdb /data 168 | ``` 169 | 170 | Start A Spark shell: 171 | ```bash 172 | spark-shell --jars /Users//spark-gdb/target/spark-gdb-0.7.jar 173 | ``` 174 | 175 | Submit a Spark Context job: 176 | ```scala 177 | import com.esri.gdb._ 178 | import com.esri.udt.PointType 179 | sc.gdbFile("hdfs:///data/Test.gdb", "Points", numPartitions = 2).map(row => { 180 | row.getAs[PointType](row.fieldIndex("Shape")) 181 | }).foreach(println) 182 | ``` 183 | 184 | Submit a SQL Context job: 185 | ```scala 186 | val df = sqlContext.read. 187 | format("com.esri.gdb"). 188 | option("path", "hdfs:///data/Test.gdb"). 189 | option("name", "Lines"). 190 | option("numPartitions", "2"). 191 | load() 192 | df.registerTempTable("lines") 193 | sqlContext.sql("select * from lines").show() 194 | ``` 195 | 196 | #### Notes to self 197 | 198 | set terminal type in windows to enable cursor movement: 199 | ``` 200 | set term=ansi 201 | ``` 202 | 203 | Start a CM instance: 204 | ``` 205 | /home/cloudera/cloudera-manager --express 206 | ``` 207 | -------------------------------------------------------------------------------- /media/TestToolbox.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/media/TestToolbox.png -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | com.esri 7 | spark-gdb 8 | 0.7 9 | jar 10 | 11 | 12 | UTF-8 13 | 2.10 14 | ${scala.compact}.6 15 | 16 | 17 | 18 | 19 | cloudera-releases 20 | https://repository.cloudera.com/artifactory/cloudera-repos 21 | 22 | false 23 | 24 | 25 | 26 | scala-tools.org 27 | https://oss.sonatype.org/content/groups/scala-tools/ 28 | 29 | false 30 | 31 | 32 | 33 | 34 | 35 | 36 | scala-tools.org 37 | https://oss.sonatype.org/content/groups/scala-tools/ 38 | 39 | 40 | 41 | 42 | 43 | com.esri.geometry 44 | esri-geometry-api 45 | 1.2.1 46 | 47 | 48 | org.codehaus.jackson 49 | jackson-core-asl 50 | 51 | 52 | org.json 53 | json 54 | 55 | 56 | 57 | 65 | 66 | org.apache.spark 67 | spark-sql_${scala.compact} 68 | ${spark.version} 69 | ${spark.scope} 70 | 71 | 72 | joda-time 73 | joda-time 74 | 2.9.4 75 | provided 76 | 77 | 78 | org.joda 79 | joda-convert 80 | 1.8.1 81 | provided 82 | 83 | 84 | com.jsuereth 85 | scala-arm_${scala.compact} 86 | 1.4 87 | test 88 | 89 | 90 | org.scalatest 91 | scalatest_${scala.compact} 92 | 2.2.6 93 | test 94 | 95 | 96 | 97 | 98 | 101 | 102 | 103 | 104 | net.alchim31.maven 105 | scala-maven-plugin 106 | 3.2.2 107 | 108 | 109 | 110 | 111 | 112 | net.alchim31.maven 113 | scala-maven-plugin 114 | 3.2.2 115 | 116 | 117 | scala-compile-first 118 | process-resources 119 | 120 | compile 121 | 122 | 123 | 124 | scala-test-compile-first 125 | process-test-resources 126 | 127 | testCompile 128 | 129 | 130 | 131 | 132 | ${scala.compact} 133 | ${scala.version} 134 | incremental 135 | false 136 | 137 | 138 | 139 | org.apache.maven.plugins 140 | maven-compiler-plugin 141 | 3.5.1 142 | 143 | ${java.version} 144 | ${java.version} 145 | 146 | 147 | 148 | org.apache.maven.plugins 149 | maven-surefire-plugin 150 | 2.19.1 151 | 152 | true 153 | 154 | 155 | 156 | org.scalatest 157 | scalatest-maven-plugin 158 | 1.0 159 | 160 | ${project.build.directory}/surefire-reports 161 | . 162 | TestSuite.txt 163 | 164 | 165 | 166 | test 167 | 168 | test 169 | 170 | 171 | 172 | 173 | 174 | org.apache.maven.plugins 175 | maven-shade-plugin 176 | 2.4.3 177 | 178 | 179 | package 180 | 181 | shade 182 | 183 | 184 | 185 | 187 | 189 | reference.conf 190 | 191 | 197 | 198 | 199 | 200 | *:* 201 | 202 | META-INF/*.SF 203 | META-INF/*.DSA 204 | META-INF/*.RSA 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | spark-1.6 218 | 219 | true 220 | 221 | 222 | provided 223 | 1.6.2 224 | 1.8 225 | 226 | 227 | 228 | spark-1.4 229 | 230 | provided 231 | 1.4.1 232 | 1.7 233 | 234 | 235 | 236 | intellij 237 | 238 | compile 239 | 1.6.2 240 | 1.8 241 | 242 | 243 | 244 | quickstart 245 | 246 | provided 247 | 1.5.0-cdh5.5.0 248 | 1.7 249 | 250 | 251 | 252 | org.xerial.snappy 253 | snappy-java 254 | 1.1.2 255 | test 256 | 257 | 258 | 259 | 260 | 261 | 262 | -------------------------------------------------------------------------------- /src/main/python/com/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/main/python/com/__init__.py -------------------------------------------------------------------------------- /src/main/python/com/esri/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/main/python/com/esri/__init__.py -------------------------------------------------------------------------------- /src/main/python/com/esri/udt/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['PointType', 'PointZType', 'PointMType', 'PointZMType', 'PolylineType', 'PolygonType'] 2 | 3 | import array 4 | import sys 5 | from pyspark.sql.types import UserDefinedType, StructField, StructType, DoubleType, IntegerType, ArrayType 6 | 7 | # 8 | # Copied from Spark VectorUDT 9 | # 10 | if sys.version >= '3': 11 | basestring = str 12 | xrange = range 13 | import copyreg as copy_reg 14 | 15 | long = int 16 | else: 17 | import copy_reg 18 | 19 | if sys.version_info[:2] == (2, 7): 20 | # speed up pickling array in Python 2.7 21 | def fast_pickle_array(ar): 22 | return array.array, (ar.typecode, ar.tostring()) 23 | 24 | 25 | copy_reg.pickle(array.array, fast_pickle_array) 26 | 27 | 28 | class PointZUDT(UserDefinedType): 29 | """ 30 | SQL user-defined type (UDT) for PointZ. 31 | """ 32 | 33 | @classmethod 34 | def sqlType(self): 35 | return StructType([ 36 | StructField("x", DoubleType(), False), 37 | StructField("y", DoubleType(), False), 38 | StructField("z", DoubleType(), False) 39 | ]) 40 | 41 | @classmethod 42 | def module(cls): 43 | return "com.esri.udt" 44 | 45 | @classmethod 46 | def scalaUDT(cls): 47 | return "com.esri.udt.PointZUDT" 48 | 49 | def serialize(self, obj): 50 | return obj.x, obj.y, obj.z 51 | 52 | def deserialize(self, datum): 53 | return PointZType(datum[0], datum[1], datum[2]) 54 | 55 | def simpleString(self): 56 | return "pointZ" 57 | 58 | 59 | class PointZType(object): 60 | __UDT__ = PointZUDT() 61 | 62 | def __init__(self, x, y, z): 63 | self.x = x 64 | self.y = y 65 | self.z = z 66 | 67 | def __repr__(self): 68 | return "PointZType({},{},{})".format(self.x, self.y, self.z) 69 | 70 | def __str__(self): 71 | return "({},{},{})".format(self.x, self.y, self.z) 72 | 73 | def __eq__(self, other): 74 | return isinstance(other, self.__class__) and \ 75 | other.x == self.x and \ 76 | other.y == self.y and \ 77 | other.z == self.z 78 | 79 | 80 | class PointMUDT(UserDefinedType): 81 | """ 82 | SQL user-defined type (UDT) for PointM. 83 | """ 84 | 85 | @classmethod 86 | def sqlType(self): 87 | return StructType([ 88 | StructField("x", DoubleType(), False), 89 | StructField("y", DoubleType(), False), 90 | StructField("m", DoubleType(), False) 91 | ]) 92 | 93 | @classmethod 94 | def module(cls): 95 | return "com.esri.udt" 96 | 97 | @classmethod 98 | def scalaUDT(cls): 99 | return "com.esri.udt.PointMUDT" 100 | 101 | def serialize(self, obj): 102 | return obj.x, obj.y, obj.m 103 | 104 | def deserialize(self, datum): 105 | return PointMType(datum[0], datum[1], datum[2]) 106 | 107 | def simpleString(self): 108 | return "pointM" 109 | 110 | 111 | class PointMType(object): 112 | __UDT__ = PointMUDT() 113 | 114 | def __init__(self, x, y, m): 115 | self.x = x 116 | self.y = y 117 | self.m = m 118 | 119 | def __repr__(self): 120 | return "PointMType({},{},{})".format(self.x, self.y, self.m) 121 | 122 | def __str__(self): 123 | return "({},{},{})".format(self.x, self.y, self.m) 124 | 125 | def __eq__(self, other): 126 | return isinstance(other, self.__class__) and \ 127 | other.x == self.x and \ 128 | other.y == self.y and \ 129 | other.m == self.m 130 | 131 | 132 | class PointZMUDT(UserDefinedType): 133 | """ 134 | SQL user-defined type (UDT) for PointZM. 135 | """ 136 | 137 | @classmethod 138 | def sqlType(self): 139 | return StructType([ 140 | StructField("x", DoubleType(), False), 141 | StructField("y", DoubleType(), False), 142 | StructField("z", DoubleType(), False), 143 | StructField("m", DoubleType(), False) 144 | ]) 145 | 146 | @classmethod 147 | def module(cls): 148 | return "com.esri.udt" 149 | 150 | @classmethod 151 | def scalaUDT(cls): 152 | return "com.esri.udt.PointZMUDT" 153 | 154 | def serialize(self, obj): 155 | return obj.x, obj.y, obj.z, obj.m 156 | 157 | def deserialize(self, datum): 158 | return PointZMType(datum[0], datum[1], datum[2], datum[3]) 159 | 160 | def simpleString(self): 161 | return "pointZM" 162 | 163 | 164 | class PointZMType(object): 165 | __UDT__ = PointZMUDT() 166 | 167 | def __init__(self, x, y, z, m): 168 | self.x = x 169 | self.y = y 170 | self.z = z 171 | self.m = m 172 | 173 | def __repr__(self): 174 | return "PointZMType({},{},{},{})".format(self.x, self.y, self.z, self.m) 175 | 176 | def __str__(self): 177 | return "({},{},{},{})".format(self.x, self.y, self.z, self.m) 178 | 179 | def __eq__(self, other): 180 | return isinstance(other, self.__class__) and \ 181 | other.x == self.x and \ 182 | other.y == self.y and \ 183 | other.z == self.z and \ 184 | other.m == self.m 185 | 186 | 187 | class PointUDT(UserDefinedType): 188 | """ 189 | SQL user-defined type (UDT) for Point. 190 | """ 191 | 192 | @classmethod 193 | def sqlType(self): 194 | return StructType([ 195 | StructField("x", DoubleType(), False), 196 | StructField("y", DoubleType(), False) 197 | ]) 198 | 199 | @classmethod 200 | def module(cls): 201 | return "com.esri.udt" 202 | 203 | @classmethod 204 | def scalaUDT(cls): 205 | return "com.esri.udt.PointUDT" 206 | 207 | def serialize(self, obj): 208 | return obj.x, obj.y 209 | 210 | def deserialize(self, datum): 211 | return PointType(datum[0], datum[1]) 212 | 213 | def simpleString(self): 214 | return "point" 215 | 216 | 217 | class PointType(object): 218 | __UDT__ = PointUDT() 219 | 220 | def __init__(self, x, y): 221 | self.x = x 222 | self.y = y 223 | 224 | def __repr__(self): 225 | return "PointType({},{})".format(self.x, self.y) 226 | 227 | def __str__(self): 228 | return "({},{})".format(self.x, self.y) 229 | 230 | def __eq__(self, other): 231 | return isinstance(other, self.__class__) and \ 232 | other.x == self.x and other.y == self.y 233 | 234 | 235 | class PolylineUDT(UserDefinedType): 236 | """ 237 | SQL user-defined type (UDT) for Polyline. 238 | """ 239 | 240 | @classmethod 241 | def sqlType(cls): 242 | return StructType([ 243 | StructField("xmin", DoubleType(), False), 244 | StructField("ymin", DoubleType(), False), 245 | StructField("xmax", DoubleType(), False), 246 | StructField("ymax", DoubleType(), False), 247 | StructField("xyNum", ArrayType(IntegerType(), False), False), 248 | StructField("xyArr", ArrayType(DoubleType(), False), False)]) 249 | 250 | @classmethod 251 | def module(cls): 252 | return "com.esri.udt" 253 | 254 | @classmethod 255 | def scalaUDT(cls): 256 | return "com.esri.udt.PolylineUDT" 257 | 258 | def serialize(self, obj): 259 | xyNum = [int(i) for i in obj.xyNum] 260 | xyArr = [float(v) for v in obj.xyArr] 261 | return obj.xmin, obj.ymin, obj.xmax, obj.ymax, xyNum, xyArr 262 | 263 | def deserialize(self, datum): 264 | return PolylineType(datum[0], datum[1], datum[2], datum[3], datum[4], datum[5]) 265 | 266 | def simpleString(self): 267 | return "polyline" 268 | 269 | 270 | class PolylineType(object): 271 | __UDT__ = PolylineUDT() 272 | 273 | def __init__(self, xmin, ymin, xmax, ymax, xyNum, xyArr): 274 | self.xmin = xmin 275 | self.ymin = ymin 276 | self.xmax = xmax 277 | self.ymax = ymax 278 | self.xyNum = xyNum 279 | self.xyArr = xyArr 280 | 281 | def __repr__(self): 282 | return "PolylineType({},{},{},{})".format(self.xmin, self.ymin, self.xmax, self.ymax) 283 | 284 | def __str__(self): 285 | return "({},{},{},{})".format(self.xmin, self.ymin, self.xmax, self.ymax) 286 | 287 | def __eq__(self, other): 288 | return isinstance(other, self.__class__) and \ 289 | other.xmin == self.xmin and other.ymin == self.ymin and \ 290 | other.xmax == self.xmax and other.ymax == self.ymax 291 | 292 | 293 | class PolygonUDT(UserDefinedType): 294 | """ 295 | SQL user-defined type (UDT) for Polygon. 296 | """ 297 | 298 | @classmethod 299 | def sqlType(cls): 300 | return StructType([ 301 | StructField("xmin", DoubleType(), False), 302 | StructField("ymin", DoubleType(), False), 303 | StructField("xmax", DoubleType(), False), 304 | StructField("ymax", DoubleType(), False), 305 | StructField("xyNum", ArrayType(IntegerType(), False), False), 306 | StructField("xyArr", ArrayType(DoubleType(), False), False)]) 307 | 308 | @classmethod 309 | def module(cls): 310 | return "com.esri.udt" 311 | 312 | @classmethod 313 | def scalaUDT(cls): 314 | return "com.esri.udt.PolygonUDT" 315 | 316 | def serialize(self, obj): 317 | xyNum = [int(i) for i in obj.xyNum] 318 | xyArr = [float(v) for v in obj.xyArr] 319 | return obj.xmin, obj.ymin, obj.xmax, obj.ymax, xyNum, xyArr 320 | 321 | def deserialize(self, datum): 322 | return PolygonType(datum[0], datum[1], datum[2], datum[3], datum[4], datum[5]) 323 | 324 | def simpleString(self): 325 | return "polygon" 326 | 327 | 328 | class PolygonType(object): 329 | __UDT__ = PolygonUDT() 330 | 331 | def __init__(self, xmin, ymin, xmax, ymax, xyNum, xyArr): 332 | self.xmin = xmin 333 | self.ymin = ymin 334 | self.xmax = xmax 335 | self.ymax = ymax 336 | self.xyNum = xyNum 337 | self.xyArr = xyArr 338 | 339 | def __repr__(self): 340 | return "PolygonType({},{},{},{})".format(self.xmin, self.ymin, self.xmax, self.ymax) 341 | 342 | def __str__(self): 343 | return "({},{},{},{})".format(self.xmin, self.ymin, self.xmax, self.ymax) 344 | 345 | def __eq__(self, other): 346 | return isinstance(other, self.__class__) and \ 347 | other.xmin == self.xmin and other.ymin == self.ymin and \ 348 | other.xmax == self.xmax and other.ymax == self.ymax 349 | -------------------------------------------------------------------------------- /src/main/python/udtapp.py: -------------------------------------------------------------------------------- 1 | from pyspark import SparkConf, SparkContext 2 | from pyspark.sql import SQLContext 3 | from pyspark.sql.types import DoubleType 4 | 5 | from com.esri.udt import PointType, PointUDT 6 | 7 | if __name__ == "__main__": 8 | 9 | conf = SparkConf().setAppName("GDB App") 10 | sc = SparkContext(conf=conf) 11 | try: 12 | sqlContext = SQLContext(sc) 13 | 14 | sqlContext.registerFunction("getX", lambda p: p.x, DoubleType()) 15 | sqlContext.registerFunction("getY", lambda p: p.y, DoubleType()) 16 | sqlContext.registerFunction("plus2", lambda p: PointType(p.x + 2, p.y + 2), PointUDT()) 17 | 18 | points = "Points" 19 | df = sqlContext.read \ 20 | .format("com.esri.gdb") \ 21 | .options(path="../../test/resources/Test.gdb", name=points, numPartitions="1") \ 22 | .load() 23 | df.printSchema() 24 | df.registerTempTable(points) 25 | rows = sqlContext.sql("select plus2(Shape),X,Y from {}".format(points)) 26 | for row in rows.collect(): 27 | print row 28 | 29 | points = "MPoints" 30 | df = sqlContext.read \ 31 | .format("com.esri.gdb") \ 32 | .options(path="../../test/resources/Test.gdb", name=points, numPartitions="1") \ 33 | .load() 34 | df.printSchema() 35 | df.registerTempTable(points) 36 | rows = sqlContext.sql("select * from {}".format(points)) 37 | for row in rows.collect(): 38 | print row 39 | 40 | points = "ZPoints" 41 | df = sqlContext.read \ 42 | .format("com.esri.gdb") \ 43 | .options(path="../../test/resources/Test.gdb", name=points, numPartitions="1") \ 44 | .load() 45 | df.printSchema() 46 | df.registerTempTable(points) 47 | rows = sqlContext.sql("select * from {}".format(points)) 48 | for row in rows.collect(): 49 | print row 50 | 51 | points = "ZMPoints" 52 | df = sqlContext.read \ 53 | .format("com.esri.gdb") \ 54 | .options(path="../../test/resources/Test.gdb", name=points, numPartitions="1") \ 55 | .load() 56 | df.printSchema() 57 | df.registerTempTable(points) 58 | rows = sqlContext.sql("select * from {}".format(points)) 59 | for row in rows.collect(): 60 | print row 61 | 62 | lines = "Lines" 63 | df = sqlContext.read \ 64 | .format("com.esri.gdb") \ 65 | .options(path="../../test/resources/Test.gdb", name=lines, numPartitions="2") \ 66 | .load() 67 | df.printSchema() 68 | df.registerTempTable(lines) 69 | rows = sqlContext.sql("select * from {}".format(lines)) 70 | for row in rows.collect(): 71 | print row 72 | 73 | polygons = "Polygons" 74 | df = sqlContext.read \ 75 | .format("com.esri.gdb") \ 76 | .options(path="../../test/resources/Test.gdb", name=polygons, numPartitions="2") \ 77 | .load() 78 | df.printSchema() 79 | df.registerTempTable(polygons) 80 | rows = sqlContext.sql("select * from {}".format(polygons)) 81 | for row in rows.collect(): 82 | print row 83 | 84 | # sqlContext \ 85 | # .sql("select * from {}".format(gdb_name)) \ 86 | # .write \ 87 | # .format("json") \ 88 | # .save("/tmp/{}.json".format(gdb_name)) 89 | 90 | finally: 91 | sc.stop() 92 | -------------------------------------------------------------------------------- /src/main/python/udtapp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | spark-submit\ 3 | --master "local[*]"\ 4 | --driver-memory 1G\ 5 | --executor-memory 8G\ 6 | --jars ../../../target/spark-gdb-0.4.jar\ 7 | udtapp.py 8 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=INFO, console 3 | log4j.appender.console=org.apache.log4j.ConsoleAppender 4 | log4j.appender.console.target=System.out 5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 6 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss} %p %c{1}: %m%n 7 | 8 | # Settings to quiet third party logs that are too verbose 9 | log4j.logger.org.eclipse.jetty=INFO 10 | log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR 11 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO 12 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO 13 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} %-5level - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/CatRow.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | /** 4 | * Catalog Row 5 | */ 6 | case class CatRow(id: Int, name: String) { 7 | val hexName = "a%08x".format(id) 8 | } 9 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/DataBuffer.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.{ByteBuffer, ByteOrder} 4 | 5 | import org.apache.hadoop.fs.FSDataInputStream 6 | 7 | /** 8 | */ 9 | class DataBuffer(dataInput: FSDataInputStream) extends Serializable { 10 | 11 | private var bytes = new Array[Byte](1024) 12 | private var byteBuffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN) 13 | 14 | def readBytes(length: Int) = { 15 | if (length > bytes.length) { 16 | bytes = new Array[Byte](length) 17 | byteBuffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN) 18 | } 19 | else { 20 | byteBuffer.clear 21 | } 22 | dataInput.readFully(bytes, 0, length) 23 | byteBuffer 24 | } 25 | 26 | def seek(position: Long) = { 27 | dataInput.seek(position) 28 | this 29 | } 30 | 31 | def close() { 32 | dataInput.close() 33 | } 34 | } 35 | 36 | object DataBuffer { 37 | def apply(dataInput: FSDataInputStream) = { 38 | new DataBuffer(dataInput) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/DefaultSource.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import org.apache.spark.sql.SQLContext 4 | import org.apache.spark.sql.sources.{BaseRelation, RelationProvider, SchemaRelationProvider} 5 | import org.apache.spark.sql.types.StructType 6 | 7 | /** 8 | * Provides access to FileGDB data from pure SQL statements. 9 | */ 10 | class DefaultSource extends RelationProvider with SchemaRelationProvider { 11 | /** 12 | * Creates a new relation for data store in FileGDB given parameters. 13 | * Parameters must include 'path' and 'name'. 14 | */ 15 | override def createRelation(sqlContext: SQLContext, 16 | parameters: Map[String, String] 17 | ): BaseRelation = { 18 | createRelation(sqlContext, parameters, null) 19 | } 20 | 21 | /** 22 | * Creates a new relation for data store in FileGDB given parameters and user supported schema. 23 | * Parameters must include 'path' and 'name'. 24 | */ 25 | override def createRelation(sqlContext: SQLContext, 26 | parameters: Map[String, String], 27 | schema: StructType 28 | ): BaseRelation = { 29 | val path = parameters.getOrElse("path", sys.error("Parameter 'path' must be defined.")) 30 | val name = parameters.getOrElse("name", sys.error("Parameter 'name' must be defined.")) 31 | val numPartitions = parameters.getOrElse("numPartitions", "8").toInt 32 | GDBRelation(path, name, numPartitions)(sqlContext) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/EsriFieldType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | /** 4 | */ 5 | private object EsriFieldType { 6 | val INT16 = 0 7 | val INT32 = 1 8 | val FLOAT32 = 2 9 | val FLOAT64 = 3 10 | val STRING = 4 11 | val DATETIME = 5 12 | val OID = 6 13 | val SHAPE = 7 14 | val BINARY = 8 15 | val RASTER = 9 16 | val UUID = 10 17 | val GUID = 11 18 | val XML = 12 19 | } 20 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/Field.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{DataType, Metadata, StructField} 6 | 7 | /** 8 | */ 9 | abstract class Field(name: String, 10 | dataType: DataType, 11 | nullValueAllowed: Boolean, 12 | metadata: Metadata = Metadata.empty 13 | ) extends StructField(name, dataType, nullValueAllowed, metadata) { 14 | 15 | def readValue(byteBuffer: ByteBuffer, oid: Int): Any 16 | 17 | def readTuple(byteBuffer: ByteBuffer, oid: Int) = { 18 | name -> readValue(byteBuffer, oid) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldBinary.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{Metadata, BinaryType} 6 | 7 | /** 8 | */ 9 | class FieldBinary(name: String, nullValueAllowed: Boolean, metadata:Metadata) 10 | extends FieldBytes(name, BinaryType, nullValueAllowed, metadata) { 11 | 12 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 13 | getByteBuffer(byteBuffer) 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldBytes.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{DataType, Metadata} 6 | 7 | /** 8 | */ 9 | abstract class FieldBytes(name: String, 10 | dataType: DataType, 11 | nullValueAllowed: Boolean, 12 | metadata: Metadata = Metadata.empty 13 | ) 14 | extends Field(name, dataType, nullValueAllowed, metadata) { 15 | 16 | protected var m_bytes = new Array[Byte](1024) 17 | 18 | def getByteBuffer(byteBuffer: ByteBuffer) = { 19 | val numBytes = fillVarBytes(byteBuffer) 20 | ByteBuffer.wrap(m_bytes, 0, numBytes) 21 | } 22 | 23 | def fillVarBytes(byteBuffer: ByteBuffer) = { 24 | val numBytes = byteBuffer.getVarUInt.toInt 25 | if (numBytes > m_bytes.length) { 26 | m_bytes = new Array[Byte](numBytes) 27 | } 28 | 0 until numBytes foreach { 29 | m_bytes(_) = byteBuffer.get 30 | } 31 | numBytes 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldDateTime.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | import java.sql.Timestamp 5 | 6 | import org.apache.spark.sql.types.{Metadata, TimestampType} 7 | 8 | /** 9 | */ 10 | class FieldDateTime(name: String, nullValueAllowed: Boolean, metadata:Metadata) 11 | extends Field(name, TimestampType, nullValueAllowed, metadata) { 12 | 13 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 14 | val numDays = byteBuffer.getDouble 15 | // convert days since 12/30/1899 to 1/1/1970 16 | val unixDays = numDays - 25569 17 | val millis = (unixDays * 1000 * 60 * 60 * 24).ceil.toLong 18 | new Timestamp(millis) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldFloat32.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{FloatType, Metadata} 6 | 7 | /** 8 | */ 9 | class FieldFloat32(name: String, nullValueAllowed: Boolean, metadata: Metadata) 10 | extends Field(name, FloatType, nullValueAllowed, metadata) { 11 | 12 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 13 | byteBuffer.getFloat 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldFloat64.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{DoubleType, Metadata} 6 | 7 | /** 8 | */ 9 | class FieldFloat64(name: String, nullValueAllowed: Boolean, metadata: Metadata) 10 | extends Field(name, DoubleType, nullValueAllowed, metadata) { 11 | 12 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 13 | byteBuffer.getDouble 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldGeom.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import org.apache.spark.sql.types.{DataType, Metadata} 4 | 5 | /** 6 | */ 7 | @deprecated("use FieldBytes instead", "0.4") 8 | abstract class FieldGeom(name: String, 9 | dataType: DataType, 10 | nullValueAllowed: Boolean, 11 | xorig: Double, 12 | yorig: Double, 13 | xyscale: Double, 14 | metadata: Metadata) 15 | extends FieldBytes(name, dataType, nullValueAllowed, metadata) -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldGeomNoop.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{BinaryType, Metadata} 6 | 7 | /** 8 | */ 9 | class FieldGeomNoop(name: String, nullValueAllowed: Boolean) 10 | extends FieldBytes(name, BinaryType, nullValueAllowed, Metadata.empty) { 11 | 12 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 13 | throw new RuntimeException("Should not have a NOOP geometry !") 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldInt16.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{Metadata, ShortType} 6 | 7 | /** 8 | */ 9 | class FieldInt16(name: String, nullValueAllowed: Boolean, metadata: Metadata) 10 | extends Field(name, ShortType, nullValueAllowed, metadata) { 11 | 12 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 13 | byteBuffer.getShort 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldInt32.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{IntegerType, Metadata} 6 | 7 | /** 8 | */ 9 | class FieldInt32(name: String, nullValueAllowed: Boolean, metadata: Metadata) 10 | extends Field(name, IntegerType, nullValueAllowed, metadata) { 11 | 12 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 13 | byteBuffer.getInt 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldOID.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{IntegerType, Metadata} 6 | 7 | /** 8 | */ 9 | class FieldOID(name: String, nullValueAllowed: Boolean, metadata: Metadata) 10 | extends Field(name, IntegerType, nullValueAllowed, metadata) { 11 | 12 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 13 | oid 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPointMType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import com.esri.udt.{PointMType, PointMUDT} 6 | import org.apache.spark.sql.types.Metadata 7 | 8 | /** 9 | */ 10 | object FieldPointMType extends Serializable { 11 | def apply(name: String, 12 | nullValueAllowed: Boolean, 13 | xOrig: Double, 14 | yOrig: Double, 15 | mOrig: Double, 16 | xyScale: Double, 17 | mScale: Double, 18 | metadata: Metadata 19 | ) = { 20 | new FieldPointMType(name, nullValueAllowed, xOrig, yOrig, mOrig, xyScale, mScale, metadata) 21 | } 22 | } 23 | 24 | class FieldPointMType(name: String, 25 | nullValueAllowed: Boolean, 26 | xOrig: Double, 27 | yOrig: Double, 28 | mOrig: Double, 29 | xyScale: Double, 30 | mScale: Double, 31 | metadata: Metadata) 32 | extends FieldBytes(name, new PointMUDT(), nullValueAllowed, metadata) { 33 | 34 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 35 | val blob = getByteBuffer(byteBuffer) 36 | 37 | val geomType = blob.getVarUInt() 38 | 39 | val vx = blob.getVarUInt 40 | val vy = blob.getVarUInt 41 | val vm = blob.getVarUInt 42 | 43 | val x = (vx - 1.0) / xyScale + xOrig 44 | val y = (vy - 1.0) / xyScale + yOrig 45 | val m = (vm - 1.0) / mScale + mOrig 46 | 47 | new PointMType(x, y, m) 48 | } 49 | } -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPointType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import com.esri.udt.{PointType, PointUDT} 6 | import org.apache.spark.sql.types.Metadata 7 | 8 | object FieldPointType extends Serializable { 9 | def apply(name: String, 10 | nullValueAllowed: Boolean, 11 | xOrig: Double, 12 | yOrig: Double, 13 | xyScale: Double, 14 | metadata: Metadata) = { 15 | new FieldPointType(name, nullValueAllowed, xOrig, yOrig, xyScale, metadata) 16 | } 17 | } 18 | 19 | class FieldPointType(name: String, 20 | nullValueAllowed: Boolean, 21 | xOrig: Double, 22 | yOrig: Double, 23 | xyScale: Double, 24 | metadata: Metadata) 25 | extends FieldBytes(name, new PointUDT(), nullValueAllowed, metadata) { 26 | 27 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 28 | val blob = getByteBuffer(byteBuffer) 29 | 30 | blob.getVarUInt() // geomType 31 | 32 | val vx = blob.getVarUInt() 33 | val vy = blob.getVarUInt() 34 | val x = (vx - 1.0) / xyScale + xOrig 35 | val y = (vy - 1.0) / xyScale + yOrig 36 | 37 | new PointType(x, y) 38 | } 39 | } -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPointZMType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import com.esri.udt.{PointZMType, PointZMUDT} 6 | import org.apache.spark.sql.types.Metadata 7 | 8 | /** 9 | */ 10 | object FieldPointZMType extends Serializable { 11 | def apply(name: String, 12 | nullValueAllowed: Boolean, 13 | xOrig: Double, 14 | yOrig: Double, 15 | zOrig: Double, 16 | mOrig: Double, 17 | xyScale: Double, 18 | zScale: Double, 19 | mScale: Double, 20 | metadata: Metadata 21 | ) = { 22 | new FieldPointZMType(name, nullValueAllowed, xOrig, yOrig, zOrig, mOrig, xyScale, zScale, mScale, metadata) 23 | } 24 | } 25 | 26 | class FieldPointZMType(name: String, 27 | nullValueAllowed: Boolean, 28 | xOrig: Double, 29 | yOrig: Double, 30 | zOrig: Double, 31 | mOrig: Double, 32 | xyScale: Double, 33 | zScale: Double, 34 | mScale: Double, 35 | metadata: Metadata) 36 | extends FieldBytes(name, new PointZMUDT(), nullValueAllowed, metadata) { 37 | 38 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 39 | val blob = getByteBuffer(byteBuffer) 40 | 41 | val geomType = blob.getVarUInt 42 | 43 | val vx = blob.getVarUInt() 44 | val vy = blob.getVarUInt() 45 | val x = (vx - 1.0) / xyScale + xOrig 46 | val y = (vy - 1.0) / xyScale + yOrig 47 | 48 | geomType match { 49 | // Point 50 | case 1 => new PointZMType(x, y) 51 | // PointZ 52 | case 9 => 53 | val vz = blob.getVarUInt 54 | val z = (vz - 1.0) / zScale + zOrig 55 | new PointZMType(x, y, z) 56 | // PointM 57 | case 21 => 58 | val vm = blob.getVarUInt 59 | val m = (vm - 1.0) / mScale + mOrig 60 | new PointZMType(x, y, 0.0, m) 61 | // PointZM 62 | case _ => 63 | val vz = blob.getVarUInt 64 | val vm = blob.getVarUInt 65 | val z = (vz - 1.0) / zScale + zOrig 66 | val m = (vm - 1.0) / mScale + mOrig 67 | new PointZMType(x, y, z, m) 68 | } 69 | } 70 | } -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPointZType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import com.esri.udt.{PointZType, PointZUDT} 6 | import org.apache.spark.sql.types.Metadata 7 | 8 | /** 9 | */ 10 | object FieldPointZType extends Serializable { 11 | def apply(name: String, 12 | nullValueAllowed: Boolean, 13 | xOrig: Double, 14 | yOrig: Double, 15 | zOrig: Double, 16 | xyScale: Double, 17 | zScale: Double, 18 | metadata: Metadata 19 | ) = { 20 | new FieldPointZType(name, nullValueAllowed, xOrig, yOrig, zOrig, xyScale, zScale, metadata) 21 | } 22 | } 23 | 24 | class FieldPointZType(name: String, 25 | nullValueAllowed: Boolean, 26 | xOrig: Double, 27 | yOrig: Double, 28 | zOrig: Double, 29 | xyScale: Double, 30 | zScale: Double, 31 | metadata: Metadata) 32 | extends FieldBytes(name, new PointZUDT(), nullValueAllowed, metadata) { 33 | 34 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 35 | val blob = getByteBuffer(byteBuffer) 36 | 37 | val geomType = blob.getVarUInt 38 | 39 | val vx = blob.getVarUInt 40 | val vy = blob.getVarUInt 41 | val vz = blob.getVarUInt 42 | 43 | val x = (vx - 1.0) / xyScale + xOrig 44 | val y = (vy - 1.0) / xyScale + yOrig 45 | val z = (vz - 1.0) / zScale + zOrig 46 | 47 | new PointZType(x, y, z) 48 | } 49 | } -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPoly.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import com.esri.core.geometry.MultiPath 6 | import org.apache.spark.sql.types.{DataType, Metadata} 7 | 8 | @deprecated("not used", "0.4") 9 | abstract class FieldPoly(name: String, 10 | dataType: DataType, 11 | nullValueAllowed: Boolean, 12 | xOrig: Double, 13 | yOrig: Double, 14 | xyScale: Double, 15 | metadata: Metadata) 16 | extends FieldBytes(name, dataType, nullValueAllowed, metadata) { 17 | 18 | protected var dx = 0L 19 | protected var dy = 0L 20 | 21 | def addPath(byteBuffer: ByteBuffer, numCoordinates: Int, path: MultiPath) = { 22 | 0 until numCoordinates foreach (n => { 23 | dx += byteBuffer.getVarInt 24 | dy += byteBuffer.getVarInt 25 | val x = dx / xyScale + xOrig 26 | val y = dy / xyScale + yOrig 27 | n match { 28 | case 0 => path.startPath(x, y) 29 | case _ => path.lineTo(x, y) 30 | } 31 | }) 32 | path 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPoly2Type.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{DataType, Metadata} 6 | 7 | abstract class FieldPoly2Type[T](name: String, 8 | dataType: DataType, 9 | nullValueAllowed: Boolean, 10 | xOrig: Double, 11 | yOrig: Double, 12 | xyScale: Double, 13 | metadata: Metadata) 14 | extends FieldBytes(name, dataType, nullValueAllowed, metadata) { 15 | 16 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 17 | val blob = getByteBuffer(byteBuffer) 18 | 19 | val geomType = blob.getVarUInt 20 | val numPoints = blob.getVarUInt.toInt 21 | if (numPoints == 0) 22 | createPolyType(0, 0, 0, 0, Array.empty[Int], Array.empty[Double]) 23 | else { 24 | val numParts = blob.getVarUInt.toInt 25 | 26 | val xmin = blob.getVarUInt / xyScale + xOrig 27 | val ymin = blob.getVarUInt / xyScale + yOrig 28 | val xmax = blob.getVarUInt / xyScale + xmin 29 | val ymax = blob.getVarUInt / xyScale + ymin 30 | 31 | var dx = 0L 32 | var dy = 0L 33 | 34 | val xyNum = new Array[Int](numParts) 35 | val xyArr = new Array[Double](numPoints * 2) 36 | 37 | if (numParts > 1) { 38 | var i = 0 39 | var sum = 0 40 | 1 to numParts foreach (partIndex => { 41 | if (partIndex == numParts) { 42 | xyNum(i) = numPoints - sum 43 | } else { 44 | val numXY = blob.getVarUInt.toInt 45 | xyNum(i) = numXY 46 | sum += numXY 47 | i += 1 48 | } 49 | }) 50 | i = 0 51 | xyNum.foreach(numXY => { 52 | 0 until numXY foreach (n => { 53 | dx += blob.getVarInt 54 | dy += blob.getVarInt 55 | val x = dx / xyScale + xOrig 56 | val y = dy / xyScale + yOrig 57 | xyArr(i) = x 58 | i += 1 59 | xyArr(i) = y 60 | i += 1 61 | }) 62 | }) 63 | } 64 | else { 65 | xyNum(0) = numPoints 66 | var i = 0 67 | 0 until numPoints foreach (n => { 68 | dx += blob.getVarInt 69 | dy += blob.getVarInt 70 | val x = dx / xyScale + xOrig 71 | val y = dy / xyScale + yOrig 72 | xyArr(i) = x 73 | i += 1 74 | xyArr(i) = y 75 | i += 1 76 | }) 77 | } 78 | createPolyType(xmin, ymin, xmax, ymax, xyNum, xyArr) 79 | } 80 | } 81 | 82 | def createPolyType(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]): T 83 | } 84 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPoly3Type.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{DataType, Metadata} 6 | 7 | abstract class FieldPoly3Type[T](name: String, 8 | dataType: DataType, 9 | nullValueAllowed: Boolean, 10 | xOrig: Double, 11 | yOrig: Double, 12 | nOrig: Double, 13 | xyScale: Double, 14 | nScale: Double, 15 | metadata: Metadata) 16 | extends FieldBytes(name, dataType, nullValueAllowed, metadata) { 17 | 18 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 19 | val blob = getByteBuffer(byteBuffer) 20 | 21 | val geomType = blob.getVarUInt 22 | val numPoints = blob.getVarUInt.toInt 23 | // TODO - Handle zero num points in other geom type. 24 | if (numPoints == 0) { 25 | createPolyMType(0, 0, 0, 0, Array.empty[Int], Array.empty[Double]) 26 | } 27 | else { 28 | val numParts = blob.getVarUInt.toInt 29 | 30 | val xmin = blob.getVarUInt / xyScale + xOrig 31 | val ymin = blob.getVarUInt / xyScale + yOrig 32 | val xmax = blob.getVarUInt / xyScale + xmin 33 | val ymax = blob.getVarUInt / xyScale + ymin 34 | 35 | var dx = 0L 36 | var dy = 0L 37 | 38 | val xyNum = new Array[Int](numParts) 39 | val xyArr = new Array[Double](numPoints * 3) 40 | 41 | var i = 0 42 | if (numParts > 1) { 43 | var sum = 0 44 | 1 to numParts foreach (partIndex => { 45 | if (partIndex == numParts) { 46 | xyNum(i) = numPoints - sum 47 | } else { 48 | val numXY = blob.getVarUInt.toInt 49 | xyNum(i) = numXY 50 | sum += numXY 51 | i += 1 52 | } 53 | }) 54 | i = 0 55 | xyNum.foreach(numXY => { 56 | 0 until numXY foreach (_ => { 57 | dx += blob.getVarInt 58 | dy += blob.getVarInt 59 | val x = dx / xyScale + xOrig 60 | val y = dy / xyScale + yOrig 61 | xyArr(i) = x 62 | i += 1 63 | xyArr(i) = y 64 | i += 2 65 | }) 66 | }) 67 | } 68 | else { 69 | xyNum(0) = numPoints 70 | 0 until numPoints foreach (_ => { 71 | dx += blob.getVarInt 72 | dy += blob.getVarInt 73 | xyArr(i) = dx / xyScale + xOrig 74 | i += 1 75 | xyArr(i) = dy / xyScale + yOrig 76 | i += 2 77 | }) 78 | } 79 | i = 2 80 | var dn = 0L 81 | 0 until numPoints foreach (_ => { 82 | dn += blob.getVarInt 83 | xyArr(i) = dn / nScale + nOrig 84 | i += 3 85 | }) 86 | createPolyMType(xmin, ymin, xmax, ymax, xyNum, xyArr) 87 | } 88 | } 89 | 90 | def createPolyMType(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]): T 91 | } 92 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPolygon.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import com.esri.core.geometry.Polygon 6 | import com.esri.udt.PolygonUDT 7 | import org.apache.spark.sql.types.{DataType, Metadata} 8 | 9 | @deprecated("not used", "0.4") 10 | object FieldPolygon { 11 | def apply(name: String, 12 | nullValueAllowed: Boolean, 13 | xOrig: Double, 14 | yOrig: Double, 15 | xyScale: Double, 16 | metadata: Metadata) = { 17 | new FieldPolygonEsri(name, nullValueAllowed, xOrig, yOrig, xyScale, metadata) 18 | } 19 | } 20 | 21 | @deprecated("not used", "0.4") 22 | abstract class FieldPolygon(name: String, 23 | dataType: DataType, 24 | nullValueAllowed: Boolean, 25 | xOrig: Double, 26 | yOrig: Double, 27 | xyScale: Double, 28 | metadata: Metadata 29 | ) 30 | extends FieldPoly(name, dataType, nullValueAllowed, xOrig, yOrig, xyScale, metadata) { 31 | 32 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 33 | val polygon = new Polygon() 34 | 35 | val blob = getByteBuffer(byteBuffer) 36 | 37 | val geomType = blob.getVarUInt 38 | 39 | val numPoints = blob.getVarUInt.toInt 40 | val numParts = blob.getVarUInt.toInt 41 | 42 | val xmin = blob.getVarUInt / xyScale + xOrig 43 | val ymin = blob.getVarUInt / xyScale + yOrig 44 | val xmax = blob.getVarUInt / xyScale + xmin 45 | val ymax = blob.getVarUInt / xyScale + ymin 46 | 47 | dx = 0L 48 | dy = 0L 49 | 50 | if (numParts > 1) { 51 | var sum = 0 52 | val numCoordSeq = 1 to numParts map (part => { 53 | val numCoord = if (part == numParts) { 54 | numPoints - sum 55 | } else { 56 | blob.getVarUInt.toInt 57 | } 58 | sum += numCoord 59 | numCoord 60 | }) 61 | // TODO - fix shells and holes based on https://github.com/rouault/dump_gdbtable/wiki/FGDB-Spec 62 | numCoordSeq.foreach(numCoord => addPath(blob, numCoord, polygon)) 63 | } 64 | else { 65 | addPath(blob, numPoints, polygon) 66 | } 67 | polygon 68 | } 69 | } 70 | 71 | @deprecated("not used", "0.4") 72 | class FieldPolygonEsri(name: String, 73 | nullValueAllowed: Boolean, 74 | xOrig: Double, 75 | yOrig: Double, 76 | xyScale: Double, 77 | metadata: Metadata) 78 | extends FieldPolygon(name, new PolygonUDT(), nullValueAllowed, xOrig, yOrig, xyScale, metadata) 79 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPolygonType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import com.esri.udt.{PolygonType, PolygonUDT} 4 | import org.apache.spark.sql.types.Metadata 5 | 6 | /** 7 | */ 8 | object FieldPolygonType extends Serializable { 9 | def apply(name: String, 10 | nullValueAllowed: Boolean, 11 | xOrig: Double, 12 | yOrig: Double, 13 | xyScale: Double, 14 | metadata: Metadata) = { 15 | new FieldPolygonType(name, nullValueAllowed, xOrig, yOrig, xyScale, metadata) 16 | } 17 | } 18 | 19 | class FieldPolygonType(name: String, 20 | nullValueAllowed: Boolean, 21 | xOrig: Double, 22 | yOrig: Double, 23 | xyScale: Double, 24 | metadata: Metadata) 25 | extends FieldPoly2Type[PolygonType](name, new PolygonUDT(), nullValueAllowed, xOrig, yOrig, xyScale, metadata) { 26 | 27 | override def createPolyType(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]): PolygonType = { 28 | PolygonType(xmin, ymin, xmax, ymax, xyNum, xyArr) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPolyline.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import com.esri.core.geometry.Polyline 6 | import com.esri.udt.PolylineUDT 7 | import org.apache.spark.sql.types.Metadata 8 | 9 | @deprecated("not used", "0.4") 10 | object FieldPolyline extends Serializable { 11 | def apply(name: String, 12 | nullValueAllowed: Boolean, 13 | xOrig: Double, 14 | yOrig: Double, 15 | xyScale: Double, 16 | metadata: Metadata) = { 17 | new FieldPolyline(name, nullValueAllowed, xOrig, yOrig, xyScale, metadata) 18 | } 19 | } 20 | 21 | @deprecated("not used", "0.4") 22 | class FieldPolyline(name: String, 23 | nullValueAllowed: Boolean, 24 | xOrig: Double, 25 | yOrig: Double, 26 | xyScale: Double, 27 | metadata: Metadata 28 | ) 29 | extends FieldPoly(name, new PolylineUDT(), nullValueAllowed, xOrig, yOrig, xyScale, metadata) { 30 | 31 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 32 | val polyline = new Polyline() 33 | 34 | val blob = getByteBuffer(byteBuffer) 35 | val geomType = blob.getVarUInt 36 | 37 | val numPoints = blob.getVarUInt.toInt 38 | val numParts = blob.getVarUInt.toInt 39 | 40 | val xmin = blob.getVarUInt / xyScale + xOrig 41 | val ymin = blob.getVarUInt / xyScale + yOrig 42 | val xmax = blob.getVarUInt / xyScale + xmin 43 | val ymax = blob.getVarUInt / xyScale + ymin 44 | 45 | dx = 0L 46 | dy = 0L 47 | 48 | if (numParts > 1) { 49 | var sum = 0 50 | val numCoordSeq = 1 to numParts map (part => { 51 | val numCoord = if (part == numParts) { 52 | numPoints - sum 53 | } else { 54 | blob.getVarUInt.toInt 55 | } 56 | sum += numCoord 57 | numCoord 58 | }) 59 | numCoordSeq.foreach(numCoord => addPath(blob, numCoord, polyline)) 60 | } 61 | else { 62 | addPath(blob, numPoints, polyline) 63 | } 64 | polyline 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPolylineMType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import com.esri.udt.{PolylineMType, PolylineMUDT} 4 | import org.apache.spark.sql.types.Metadata 5 | 6 | /** 7 | */ 8 | object FieldPolylineMType extends Serializable { 9 | def apply(name: String, 10 | nullValueAllowed: Boolean, 11 | xOrig: Double, 12 | yOrig: Double, 13 | mOrig: Double, 14 | xyScale: Double, 15 | mScale: Double, 16 | metadata: Metadata) = { 17 | new FieldPolylineMType(name, nullValueAllowed, xOrig, yOrig, mOrig, xyScale, mScale, metadata) 18 | } 19 | } 20 | 21 | class FieldPolylineMType(name: String, 22 | nullValueAllowed: Boolean, 23 | xOrig: Double, 24 | yOrig: Double, 25 | mOrig: Double, 26 | xyScale: Double, 27 | mScale: Double, 28 | metadata: Metadata) 29 | extends FieldPoly3Type[PolylineMType](name, new PolylineMUDT(), nullValueAllowed, xOrig, yOrig, mOrig, xyScale, mScale, metadata) { 30 | 31 | override def createPolyMType(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]) = { 32 | PolylineMType(xmin, ymin, xmax, ymax, xyNum, xyArr) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldPolylineType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import com.esri.udt.{PolylineType, PolylineUDT} 4 | import org.apache.spark.sql.types.Metadata 5 | 6 | /** 7 | */ 8 | object FieldPolylineType extends Serializable { 9 | def apply(name: String, 10 | nullValueAllowed: Boolean, 11 | xOrig: Double, 12 | yOrig: Double, 13 | xyScale: Double, 14 | metadata: Metadata) = { 15 | new FieldPolylineType(name, nullValueAllowed, xOrig, yOrig, xyScale, metadata) 16 | } 17 | } 18 | 19 | class FieldPolylineType(name: String, 20 | nullValueAllowed: Boolean, 21 | xOrig: Double, 22 | yOrig: Double, 23 | xyScale: Double, 24 | metadata: Metadata) 25 | extends FieldPoly2Type[PolylineType](name, new PolylineUDT(), nullValueAllowed, xOrig, yOrig, xyScale, metadata) { 26 | 27 | override def createPolyType(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]): PolylineType = { 28 | PolylineType(xmin, ymin, xmax, ymax, xyNum, xyArr) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldString.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{Metadata, StringType} 6 | 7 | /** 8 | */ 9 | class FieldString(name: String, nullValueAllowed: Boolean, metadata: Metadata) 10 | extends FieldBytes(name, StringType, nullValueAllowed, metadata) { 11 | 12 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 13 | val numBytes = fillVarBytes(byteBuffer) 14 | new String(m_bytes, 0, numBytes) // TODO - define Charset, like UTF-8 ? 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/FieldUUID.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.sql.types.{Metadata, StringType} 6 | 7 | /** 8 | */ 9 | class FieldUUID(name: String, nullValueAllowed: Boolean, metadata: Metadata) 10 | extends Field(name, StringType, nullValueAllowed, metadata) { 11 | 12 | private val b = new Array[Byte](16) 13 | 14 | override def readValue(byteBuffer: ByteBuffer, oid: Int) = { 15 | 16 | 0 until 16 foreach (b(_) = byteBuffer.get) 17 | 18 | "{%02X%02X%02X%02X-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X}".format( 19 | b(3), b(2), b(1), b(0), 20 | b(5), b(4), b(7), b(6), 21 | b(8), b(9), b(10), b(11), 22 | b(12), b(13), b(14), b(15)) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/GDBIndex.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import java.io.{DataInput, File} 4 | import java.nio.{ByteBuffer, ByteOrder} 5 | 6 | import org.apache.hadoop.conf.Configuration 7 | import org.apache.hadoop.fs.{FSDataInputStream, Path} 8 | import org.apache.spark.Logging 9 | 10 | object GDBIndex { 11 | def apply(path: String, name: String, conf: Configuration = new Configuration()) = { 12 | val filename = StringBuilder.newBuilder.append(path).append(File.separator).append(name).append(".gdbtablx").toString() 13 | val hdfsPath = new Path(filename) 14 | val dataInput = hdfsPath.getFileSystem(conf).open(hdfsPath) 15 | 16 | val bytes = new Array[Byte](16) 17 | dataInput.readFully(bytes) 18 | val byteBuffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN) 19 | 20 | val signature = byteBuffer.getInt 21 | val n1024Blocks = byteBuffer.getInt 22 | val numRows = byteBuffer.getInt 23 | val indexSize = byteBuffer.getInt 24 | 25 | new GDBIndex(dataInput, numRows, indexSize) 26 | } 27 | } 28 | 29 | private[gdb] class GDBIndex(dataInput: FSDataInputStream, 30 | val numRows: Int, 31 | indexSize: Int 32 | ) extends Logging with AutoCloseable with Serializable { 33 | 34 | def readSeekForRowNum(rowNum: Int) = { 35 | val bytes = new Array[Byte](indexSize) 36 | dataInput.seek(16 + rowNum * indexSize) 37 | dataInput.readFully(bytes) 38 | ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).getInt 39 | } 40 | 41 | def iterator(startAtRow: Int = 0, numRowsToRead: Int = -1) = { 42 | dataInput.seek(16 + startAtRow * indexSize) 43 | val maxRows = if (numRowsToRead == -1) numRows else numRowsToRead 44 | // log.info(s"iterator::startAtRow=$startAtRow maxRows=$maxRows") 45 | new GDBIndexIterator(dataInput, startAtRow, maxRows, indexSize).withFilter(_.isSeekable) 46 | } 47 | 48 | def close() { 49 | dataInput.close() 50 | } 51 | } 52 | 53 | private[gdb] class GDBIndexIterator(dataInput: DataInput, 54 | startID: Int, 55 | maxRows: Int, 56 | indexSize: Int 57 | ) extends Iterator[IndexInfo] with Logging with Serializable { 58 | 59 | private val indexInfo = IndexInfo(0, 0) 60 | private val bytes = new Array[Byte](indexSize) 61 | private val byteBuffer = ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN) 62 | 63 | private var objectID = startID 64 | private var nextRow = 0 65 | 66 | def hasNext() = nextRow < maxRows 67 | 68 | def next() = { 69 | // log.info(s"next::nextRow=$nextRow maxRows=$maxRows") 70 | nextRow += 1 71 | 72 | objectID += 1 73 | indexInfo.objectID = objectID 74 | 75 | byteBuffer.clear 76 | dataInput.readFully(bytes) 77 | indexInfo.seek = byteBuffer.getInt 78 | 79 | indexInfo 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/GDBRDD.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import org.apache.hadoop.conf.Configuration 4 | import org.apache.spark.annotation.DeveloperApi 5 | import org.apache.spark.rdd.RDD 6 | import org.apache.spark.sql.Row 7 | import org.apache.spark.{Logging, Partition, SparkContext, TaskContext} 8 | 9 | /** 10 | */ 11 | case class GDBRDD(@transient sc: SparkContext, gdbPath: String, gdbName: String, numPartitions: Int) extends RDD[Row](sc, Nil) with Logging { 12 | 13 | @DeveloperApi 14 | override def compute(partition: Partition, context: TaskContext): Iterator[Row] = { 15 | val part = partition.asInstanceOf[GDBPartition] 16 | val hadoopConf = if (sc == null) new Configuration() else sc.hadoopConfiguration 17 | val index = GDBIndex(gdbPath, part.hexName, hadoopConf) 18 | val table = GDBTable(gdbPath, part.hexName, hadoopConf) 19 | context.addTaskCompletionListener(context => { 20 | table.close() 21 | index.close() 22 | }) 23 | table.rowIterator(index, part.startAtRow, part.numRowsToRead) 24 | } 25 | 26 | override protected def getPartitions: Array[Partition] = { 27 | val hadoopConf = if (sc == null) new Configuration() else sc.hadoopConfiguration 28 | GDBTable.findTable(gdbPath, gdbName, hadoopConf) match { 29 | case Some(catTab) => { 30 | val index = GDBIndex(gdbPath, catTab.hexName, hadoopConf) 31 | try { 32 | val numRows = index.numRows 33 | val numRowsPerPartition = (numRows.toDouble / numPartitions).ceil.toInt 34 | var startAtRow = 0 35 | (0 until numPartitions).map(i => { 36 | val endAtRow = startAtRow + numRowsPerPartition 37 | val numRowsToRead = if (endAtRow <= numRows) numRowsPerPartition else numRows - startAtRow 38 | val gdbPartition = GDBPartition(i, catTab.hexName, startAtRow, numRowsToRead) 39 | startAtRow += numRowsToRead 40 | gdbPartition 41 | }).toArray 42 | } finally { 43 | index.close() 44 | } 45 | } 46 | case _ => { 47 | log.error(s"Cannot find '$gdbName' in $gdbPath, creating an empty array of Partitions !") 48 | Array.empty[Partition] 49 | } 50 | } 51 | } 52 | } 53 | 54 | private[this] case class GDBPartition(m_index: Int, 55 | val hexName: String, 56 | val startAtRow: Int, 57 | val numRowsToRead: Int 58 | ) extends Partition { 59 | override def index = m_index 60 | } 61 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/GDBRelation.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import org.apache.spark.Logging 4 | import org.apache.spark.rdd.RDD 5 | import org.apache.spark.sql.sources.{BaseRelation, TableScan} 6 | import org.apache.spark.sql.types._ 7 | import org.apache.spark.sql.{Row, SQLContext} 8 | 9 | /** 10 | */ 11 | case class GDBRelation(gdbPath: String, gdbName: String, numPartition: Int) 12 | (@transient val sqlContext: SQLContext) 13 | extends BaseRelation with Logging with TableScan { 14 | 15 | override val schema = inferSchema() 16 | 17 | private def inferSchema() = { 18 | val sc = sqlContext.sparkContext 19 | GDBTable.findTable(gdbPath, gdbName, sc.hadoopConfiguration) match { 20 | case Some(catTab) => { 21 | val table = GDBTable(gdbPath, catTab.hexName, sc.hadoopConfiguration) 22 | try { 23 | table.schema() 24 | } finally { 25 | table.close() 26 | } 27 | } 28 | case _ => { 29 | log.error(s"Cannot find '$gdbName' in $gdbPath, creating an empty schema !") 30 | StructType(Seq.empty[StructField]) 31 | } 32 | } 33 | } 34 | 35 | override def buildScan(): RDD[Row] = { 36 | GDBRDD(sqlContext.sparkContext, gdbPath, gdbName, numPartition) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/GDBRowIterator.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import org.apache.spark.sql.Row 4 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema 5 | import org.apache.spark.sql.types.StructType 6 | 7 | /** 8 | */ 9 | class GDBRowIterator(indexIter: Iterator[IndexInfo], dataBuffer: DataBuffer, fields: Array[Field], schema: StructType) 10 | extends Iterator[Row] with Serializable { 11 | 12 | val numFieldsWithNullAllowed = fields.count(_.nullable) 13 | val nullValueMasks = new Array[Byte]((numFieldsWithNullAllowed / 8.0).ceil.toInt) 14 | 15 | def hasNext() = indexIter.hasNext 16 | 17 | def next() = { 18 | val index = indexIter.next() 19 | val numBytes = dataBuffer.seek(index.seek).readBytes(4).getInt 20 | val byteBuffer = dataBuffer.readBytes(numBytes) 21 | 0 until nullValueMasks.length foreach (nullValueMasks(_) = byteBuffer.get) 22 | var bit = 0 23 | val values = fields.map(field => { 24 | if (field.nullable) { 25 | val i = bit >> 3 26 | val m = 1 << (bit & 7) 27 | bit += 1 28 | if ((nullValueMasks(i) & m) == 0) { 29 | field.readValue(byteBuffer, index.objectID) 30 | } 31 | else { 32 | null // TODO - Do not like null here - but...it is nullable ! 33 | } 34 | } else { 35 | field.readValue(byteBuffer, index.objectID) 36 | } 37 | } 38 | ) 39 | new GenericRowWithSchema(values, schema) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/GDBTableScanWithNullValues.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import scala.collection.mutable 4 | 5 | /** 6 | */ 7 | class GDBTableScanWithNullValues(dataBuffer: DataBuffer, fields: Seq[Field], maxRows: Int, startID: Int = 0) 8 | extends Iterator[Map[String, Any]] with Serializable { 9 | 10 | val numFieldsWithNullAllowed = fields.count(_.nullable) 11 | val nullValueIndicators = new Array[Byte]((numFieldsWithNullAllowed / 8.0).ceil.toInt) 12 | 13 | var nextRow = 0 14 | var objectID = startID 15 | 16 | def hasNext() = nextRow < maxRows 17 | 18 | def next() = { 19 | nextRow += 1 20 | objectID += 1 21 | val numBytes = dataBuffer.readBytes(4).getInt 22 | val byteBuffer = dataBuffer.readBytes(numBytes) 23 | 0 until nullValueIndicators.length foreach (nullValueIndicators(_) = byteBuffer.get) 24 | var bit = 0 25 | val map = mutable.Map[String, Any]() 26 | fields.foreach(field => { 27 | if (field.nullable) { 28 | val i = bit >> 3 29 | val m = 1 << (bit & 7) 30 | bit += 1 31 | if ((nullValueIndicators(i) & m) == 0) { 32 | map(field.name) = field.readValue(byteBuffer, objectID) 33 | } 34 | } else { 35 | map(field.name) = field.readValue(byteBuffer, objectID) 36 | } 37 | } 38 | ) 39 | map.toMap 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/GDBTableSeekWithNoNullValues.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | /** 4 | */ 5 | class GDBTableSeekWithNoNullValues(dataBuffer: DataBuffer, fields: Seq[Field], indexIter: Iterator[IndexInfo]) 6 | extends Iterator[Map[String, Any]] with Serializable { 7 | 8 | def hasNext() = indexIter.hasNext 9 | 10 | def next() = { 11 | val index = indexIter.next() 12 | val numBytes = dataBuffer.seek(index.seek).readBytes(4).getInt 13 | val byteBuffer = dataBuffer.readBytes(numBytes) 14 | fields.map(_.readTuple(byteBuffer, index.objectID)).toMap 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/GDBTableSeekWithNullValues.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import scala.collection.mutable 4 | 5 | /** 6 | */ 7 | class GDBTableSeekWithNullValues(dataBuffer: DataBuffer, 8 | fields: Seq[Field], 9 | numFieldsWithNullAllowed: Int, 10 | indexIter: Iterator[IndexInfo]) 11 | extends Iterator[Map[String, Any]] with Serializable { 12 | 13 | private val nullValueIndicators = new Array[Byte]((numFieldsWithNullAllowed / 8.0).ceil.toInt) 14 | 15 | def hasNext() = indexIter.hasNext 16 | 17 | def next() = { 18 | val index = indexIter.next() 19 | val numBytes = dataBuffer.seek(index.seek).readBytes(4).getInt 20 | val byteBuffer = dataBuffer.readBytes(numBytes) 21 | 0 until nullValueIndicators.length foreach (nullValueIndicators(_) = byteBuffer.get) 22 | var bit = 0 23 | val map = mutable.Map[String, Any]() 24 | fields.foreach(field => { 25 | if (field.nullable) { 26 | val i = bit >> 3 27 | val m = 1 << (bit & 7) 28 | bit += 1 29 | if ((nullValueIndicators(i) & m) == 0) { 30 | map(field.name) = field.readValue(byteBuffer, index.objectID) 31 | } 32 | } else { 33 | map(field.name) = field.readValue(byteBuffer, index.objectID) 34 | } 35 | } 36 | ) 37 | map.toMap 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/IndexInfo.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | /** 4 | */ 5 | case class IndexInfo(var objectID: Int, var seek: Int) { 6 | def isSeekable = seek > 0 7 | } 8 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/gdb/package.scala: -------------------------------------------------------------------------------- 1 | package com.esri 2 | 3 | import java.nio.ByteBuffer 4 | 5 | import org.apache.spark.SparkContext 6 | import org.apache.spark.sql.SQLContext 7 | 8 | /** 9 | */ 10 | package object gdb { 11 | 12 | implicit class ByteBufferImplicits(byteBuffer: ByteBuffer) { 13 | 14 | implicit def getVarUInt() = { 15 | var shift = 7 16 | var b: Long = byteBuffer.get 17 | var ret = b & 0x7FL 18 | var old = ret 19 | while ((b & 0x80L) != 0) { 20 | b = byteBuffer.get 21 | ret = ((b & 0x7FL) << shift) | old 22 | old = ret 23 | shift += 7 24 | } 25 | ret 26 | } 27 | 28 | implicit def getVarInt() = { 29 | var shift = 7 30 | var b: Long = byteBuffer.get 31 | val isNeg = (b & 0x40L) != 0 32 | var ret = b & 0x3FL 33 | var old = ret 34 | while ((b & 0x80L) != 0) { 35 | b = byteBuffer.get 36 | ret = ((b & 0x7FL) << (shift - 1)) | old 37 | old = ret 38 | shift += 7 39 | } 40 | if (isNeg) -ret else ret 41 | } 42 | } 43 | 44 | implicit class SparkContextImplicits(sc: SparkContext) { 45 | implicit def gdbFile(path: String, name: String, numPartitions: Int = 8) = { 46 | GDBRDD(sc, path, name, numPartitions) 47 | } 48 | } 49 | 50 | implicit class SQLContextImplicits(sqlContext: SQLContext) extends Serializable { 51 | implicit def gdbFile(path: String, name: String, numPartitions: Int = 8) = { 52 | sqlContext.baseRelationToDataFrame(GDBRelation(path, name, numPartitions)(sqlContext)) 53 | } 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PointMType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import com.esri.core.geometry._ 4 | import org.apache.spark.sql.types.SQLUserDefinedType 5 | 6 | /** 7 | */ 8 | @SQLUserDefinedType(udt = classOf[PointMUDT]) 9 | class PointMType(val x: Double = 0.0, val y: Double = 0.0, val m: Double = 0.0) extends SpatialType { 10 | 11 | /*@transient lazy override val*/ def asGeometry() = asPoint() 12 | 13 | def asPoint() = { 14 | val p = new Point(x, y) 15 | p.setM(m) 16 | p 17 | } 18 | 19 | def ==(that: PointMType) = this.x == that.x && this.y == that.y && this.m == that.m 20 | 21 | override def equals(other: Any): Boolean = other match { 22 | case that: PointMType => this == that 23 | case _ => false 24 | } 25 | 26 | override def hashCode(): Int = { 27 | Seq(x, y, m).foldLeft(0)((a, b) => { 28 | val bits = java.lang.Double.doubleToLongBits(b) 29 | 31 * a + (bits ^ (bits >>> 32)).toInt 30 | }) 31 | } 32 | 33 | override def toString = s"PointMType($x,$y,$m)" 34 | 35 | } 36 | 37 | object PointMType { 38 | def apply(geometry: Geometry) = geometry match { 39 | case point: Point => new PointMType(point.getX, point.getY, point.getM) 40 | case _ => throw new RuntimeException(s"Cannot construct PointMType from ${geometry.toString}") 41 | 42 | } 43 | 44 | def apply(x: Double, y: Double, m: Double) = new PointMType(x, y, m) 45 | 46 | def unapply(p: PointMType) = Some((p.x, p.y, p.m)) 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PointMUDT.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import org.apache.spark.sql.catalyst.InternalRow 4 | import org.apache.spark.sql.catalyst.expressions.GenericMutableRow 5 | import org.apache.spark.sql.types._ 6 | 7 | /** 8 | */ 9 | class PointMUDT extends UserDefinedType[PointMType] { 10 | 11 | override def sqlType: DataType = StructType(Seq( 12 | StructField("x", DoubleType, false), 13 | StructField("y", DoubleType, false), 14 | StructField("m", DoubleType, false) 15 | )) 16 | 17 | override def serialize(obj: Any): InternalRow = { 18 | obj match { 19 | case PointMType(x, y, m) => { 20 | val row = new GenericMutableRow(3) 21 | row.setDouble(0, x) 22 | row.setDouble(1, y) 23 | row.setDouble(2, m) 24 | row 25 | } 26 | } 27 | } 28 | 29 | override def deserialize(datum: Any): PointMType = { 30 | datum match { 31 | case row: InternalRow => PointMType(row.getDouble(0), row.getDouble(1), row.getDouble(2)) 32 | } 33 | } 34 | 35 | override def userClass: Class[PointMType] = classOf[PointMType] 36 | 37 | override def pyUDT: String = "com.esri.udt.PointMUDT" 38 | 39 | override def typeName: String = "pointM" 40 | 41 | override def equals(o: Any): Boolean = { 42 | o match { 43 | case v: PointMUDT => true 44 | case _ => false 45 | } 46 | } 47 | 48 | // see [SPARK-8647], this achieves the needed constant hash code without constant no. 49 | override def hashCode(): Int = classOf[PointMUDT].getName.hashCode() 50 | 51 | override def asNullable: PointMUDT = this 52 | 53 | } -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PointType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import com.esri.core.geometry._ 4 | import org.apache.spark.sql.types.SQLUserDefinedType 5 | 6 | /** 7 | */ 8 | @SQLUserDefinedType(udt = classOf[PointUDT]) 9 | class PointType(val x: Double = 0.0, val y: Double = 0.0) extends SpatialType { 10 | 11 | /*@transient lazy override val*/ def asGeometry() = new Point(x, y) 12 | 13 | def ==(that: PointType) = this.x == that.x && this.y == that.y 14 | 15 | override def equals(other: Any): Boolean = other match { 16 | case that: PointType => this == that 17 | case _ => false 18 | } 19 | 20 | override def hashCode(): Int = { 21 | Seq(x, y).foldLeft(0)((a, b) => { 22 | val bits = java.lang.Double.doubleToLongBits(b) 23 | 31 * a + (bits ^ (bits >>> 32)).toInt 24 | }) 25 | } 26 | 27 | override def toString = s"PointType($x, $y)" 28 | 29 | } 30 | 31 | object PointType { 32 | def apply(geometry: Geometry) = geometry match { 33 | case point: Point => new PointType(point.getX, point.getY) 34 | case line: Line => { 35 | val x = (line.getStartX + line.getEndX) / 2.0 36 | val y = (line.getStartY + line.getEndX) / 2.0 37 | new PointType(x, y) 38 | } 39 | case polyline: Polyline => { 40 | polyline.getPointCount match { 41 | case 2 => { 42 | val arr = polyline.getCoordinates2D 43 | val p0 = arr(0) 44 | val p1 = arr(1) 45 | new PointType((p0.x + p1.x) * 0.5, (p0.y + p1.y) * 0.5) 46 | } 47 | case n => { 48 | val xy = polyline.getXY(n / 2) 49 | new PointType(xy.x, xy.y) 50 | } 51 | } 52 | } 53 | case _ => { 54 | // TODO - Cheap way out here !!! 55 | val envp = new Envelope2D() 56 | geometry.queryEnvelope2D(envp) 57 | new PointType(envp.getCenterX, envp.getCenterY) 58 | } 59 | } 60 | 61 | def apply(x: Double, y: Double) = new PointType(x, y) 62 | 63 | def unapply(p: PointType) = Some((p.x, p.y)) 64 | } 65 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PointUDT.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import org.apache.spark.sql.catalyst.InternalRow 4 | import org.apache.spark.sql.catalyst.expressions.GenericMutableRow 5 | import org.apache.spark.sql.types._ 6 | 7 | /** 8 | */ 9 | class PointUDT extends UserDefinedType[PointType] { 10 | 11 | override def sqlType: DataType = StructType(Seq( 12 | StructField("x", DoubleType, false), 13 | StructField("y", DoubleType, false) 14 | )) 15 | 16 | override def serialize(obj: Any): InternalRow = { 17 | obj match { 18 | case PointType(x, y) => { 19 | val row = new GenericMutableRow(2) 20 | row.setDouble(0, x) 21 | row.setDouble(1, y) 22 | row 23 | } 24 | } 25 | } 26 | 27 | override def deserialize(datum: Any): PointType = { 28 | datum match { 29 | case row: InternalRow => PointType(row.getDouble(0), row.getDouble(1)) 30 | } 31 | } 32 | 33 | override def userClass: Class[PointType] = classOf[PointType] 34 | 35 | override def pyUDT: String = "com.esri.udt.PointUDT" 36 | 37 | override def typeName: String = "point" 38 | 39 | override def equals(o: Any): Boolean = { 40 | o match { 41 | case v: PointUDT => true 42 | case _ => false 43 | } 44 | } 45 | 46 | // see [SPARK-8647], this achieves the needed constant hash code without constant no. 47 | override def hashCode(): Int = classOf[PointUDT].getName.hashCode() 48 | 49 | override def asNullable: PointUDT = this 50 | 51 | } -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PointZMType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import com.esri.core.geometry._ 4 | import org.apache.spark.sql.types.SQLUserDefinedType 5 | 6 | /** 7 | */ 8 | @SQLUserDefinedType(udt = classOf[PointZMUDT]) 9 | class PointZMType(val x: Double = 0.0, val y: Double = 0.0, val z: Double = 0.0, val m: Double = 0.0) extends SpatialType { 10 | 11 | /*@transient lazy override val*/ def asGeometry() = asPoint() 12 | 13 | def asPoint() = { 14 | val p = new Point(x, y, z) 15 | p.setM(m) 16 | p 17 | } 18 | 19 | def ==(that: PointZMType) = this.x == that.x && this.y == that.y && this.z == that.z && this.m == that.m 20 | 21 | override def equals(other: Any): Boolean = other match { 22 | case that: PointZMType => this == that 23 | case _ => false 24 | } 25 | 26 | override def hashCode(): Int = { 27 | Seq(x, y, z, m).foldLeft(0)((a, b) => { 28 | val bits = java.lang.Double.doubleToLongBits(b) 29 | 31 * a + (bits ^ (bits >>> 32)).toInt 30 | }) 31 | } 32 | 33 | override def toString = s"PointZMType($x,$y,$z,$m)" 34 | 35 | } 36 | 37 | object PointZMType { 38 | def apply(geometry: Geometry) = geometry match { 39 | case point: Point => new PointZMType(point.getX, point.getY, point.getZ, point.getM) 40 | case _ => throw new RuntimeException(s"Cannot construct PointZMType from ${geometry.toString}") 41 | 42 | } 43 | 44 | def apply(x: Double, y: Double, z: Double, m: Double) = new PointZMType(x, y, z, m) 45 | 46 | def unapply(p: PointZMType) = Some((p.x, p.y, p.z, p.m)) 47 | } 48 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PointZMUDT.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import org.apache.spark.sql.catalyst.InternalRow 4 | import org.apache.spark.sql.catalyst.expressions.GenericMutableRow 5 | import org.apache.spark.sql.types._ 6 | 7 | /** 8 | */ 9 | class PointZMUDT extends UserDefinedType[PointZMType] { 10 | 11 | override def sqlType: DataType = StructType(Seq( 12 | StructField("x", DoubleType, false), 13 | StructField("y", DoubleType, false), 14 | StructField("z", DoubleType, false), 15 | StructField("m", DoubleType, false) 16 | )) 17 | 18 | override def serialize(obj: Any): InternalRow = { 19 | obj match { 20 | case PointZMType(x, y, z, m) => { 21 | val row = new GenericMutableRow(4) 22 | row.setDouble(0, x) 23 | row.setDouble(1, y) 24 | row.setDouble(2, z) 25 | row.setDouble(3, m) 26 | row 27 | } 28 | } 29 | } 30 | 31 | override def deserialize(datum: Any): PointZMType = { 32 | datum match { 33 | case row: InternalRow => PointZMType(row.getDouble(0), row.getDouble(1), row.getDouble(2), row.getDouble(3)) 34 | } 35 | } 36 | 37 | override def userClass: Class[PointZMType] = classOf[PointZMType] 38 | 39 | override def pyUDT: String = "com.esri.udt.PointZMUDT" 40 | 41 | override def typeName: String = "pointZM" 42 | 43 | override def equals(o: Any): Boolean = { 44 | o match { 45 | case v: PointZMUDT => true 46 | case _ => false 47 | } 48 | } 49 | 50 | // see [SPARK-8647], this achieves the needed constant hash code without constant no. 51 | override def hashCode(): Int = classOf[PointZMUDT].getName.hashCode() 52 | 53 | override def asNullable: PointZMUDT = this 54 | 55 | } -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PointZType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import com.esri.core.geometry._ 4 | import org.apache.spark.sql.types.SQLUserDefinedType 5 | 6 | /** 7 | */ 8 | @SQLUserDefinedType(udt = classOf[PointZUDT]) 9 | class PointZType(val x: Double = 0.0, val y: Double = 0.0, val z: Double = 0.0) extends SpatialType { 10 | 11 | /*@transient lazy override val*/ def asGeometry() = asPoint() 12 | 13 | def asPoint() = { 14 | new Point(x, y, z) 15 | } 16 | 17 | def ==(that: PointZType) = this.x == that.x && this.y == that.y && this.z == that.z 18 | 19 | override def equals(other: Any): Boolean = other match { 20 | case that: PointZType => this == that 21 | case _ => false 22 | } 23 | 24 | override def hashCode(): Int = { 25 | Seq(x, y, z).foldLeft(0)((a, b) => { 26 | val bits = java.lang.Double.doubleToLongBits(b) 27 | 31 * a + (bits ^ (bits >>> 32)).toInt 28 | }) 29 | } 30 | 31 | override def toString = s"PointZType($x,$y,$z)" 32 | 33 | } 34 | 35 | object PointZType { 36 | def apply(geometry: Geometry) = geometry match { 37 | case point: Point => new PointZType(point.getX, point.getY, point.getM) 38 | case _ => throw new RuntimeException(s"Cannot construct PointZType from ${geometry.toString}") 39 | } 40 | 41 | def apply(x: Double, y: Double, z: Double) = new PointZType(x, y, z) 42 | 43 | def unapply(p: PointZType) = Some((p.x, p.y, p.z)) 44 | } 45 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PointZUDT.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import org.apache.spark.sql.catalyst.InternalRow 4 | import org.apache.spark.sql.catalyst.expressions.GenericMutableRow 5 | import org.apache.spark.sql.types._ 6 | 7 | /** 8 | */ 9 | class PointZUDT extends UserDefinedType[PointZType] { 10 | 11 | override def sqlType: DataType = StructType(Seq( 12 | StructField("x", DoubleType, false), 13 | StructField("y", DoubleType, false), 14 | StructField("z", DoubleType, false) 15 | )) 16 | 17 | override def serialize(obj: Any): InternalRow = { 18 | obj match { 19 | case PointZType(x, y, z) => { 20 | val row = new GenericMutableRow(3) 21 | row.setDouble(0, x) 22 | row.setDouble(1, y) 23 | row.setDouble(2, z) 24 | row 25 | } 26 | } 27 | } 28 | 29 | override def deserialize(datum: Any): PointZType = { 30 | datum match { 31 | case row: InternalRow => PointZType(row.getDouble(0), row.getDouble(1), row.getDouble(2)) 32 | } 33 | } 34 | 35 | override def userClass: Class[PointZType] = classOf[PointZType] 36 | 37 | override def pyUDT: String = "com.esri.udt.PointZUDT" 38 | 39 | override def typeName: String = "pointZ" 40 | 41 | override def equals(o: Any): Boolean = { 42 | o match { 43 | case v: PointZUDT => true 44 | case _ => false 45 | } 46 | } 47 | 48 | // see [SPARK-8647], this achieves the needed constant hash code without constant no. 49 | override def hashCode(): Int = classOf[PointZUDT].getName.hashCode() 50 | 51 | override def asNullable: PointZUDT = this 52 | 53 | } -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PolyType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | /** 4 | */ 5 | abstract class PolyType(val xmin: Double, 6 | val ymin: Double, 7 | val xmax: Double, 8 | val ymax: Double, 9 | val xyNum: Array[Int], 10 | val xyArr: Array[Double] 11 | ) extends SpatialType { 12 | 13 | def ==(that: PolyType) = { 14 | xmin == that.xmin && 15 | ymin == that.ymin && 16 | xmax == that.xmax && 17 | ymax == that.ymax && 18 | xyNum.sameElements(that.xyNum) && 19 | xyArr.sameElements(that.xyArr) 20 | } 21 | 22 | override def hashCode(): Int = { 23 | Seq(xmin, ymin, xmax, ymax).foldLeft(0)((a, b) => { 24 | val bits = java.lang.Double.doubleToLongBits(b) 25 | 31 * a + (bits ^ (bits >>> 32)).toInt 26 | }) 27 | } 28 | 29 | override def equals(other: Any): Boolean = other match { 30 | case that: PolyType => this == that 31 | case _ => false 32 | } 33 | 34 | override def toString = "%s%s".format(getClass.getSimpleName, xyArr.mkString("(", ",", ")")) 35 | } 36 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PolyUDT.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import org.apache.spark.sql.catalyst.InternalRow 4 | import org.apache.spark.sql.catalyst.expressions.GenericMutableRow 5 | import org.apache.spark.sql.types._ 6 | 7 | /** 8 | */ 9 | abstract class PolyUDT[T] extends UserDefinedType[T] { 10 | 11 | override def sqlType: DataType = StructType(Seq( 12 | StructField("xmin", DoubleType, false), 13 | StructField("ymin", DoubleType, false), 14 | StructField("xmax", DoubleType, false), 15 | StructField("ymax", DoubleType, false), 16 | StructField("xyNum", ArrayType(IntegerType, false), false), 17 | StructField("xyArr", ArrayType(DoubleType, false), false) 18 | )) 19 | 20 | def serialize(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]) = { 21 | val row = new GenericMutableRow(6) 22 | row.update(0, xmin) 23 | row.update(1, ymin) 24 | row.update(2, xmax) 25 | row.update(3, ymax) 26 | // For Spark 1.6 27 | row.update(4, new org.apache.spark.sql.catalyst.util.GenericArrayData(xyNum.map(_.asInstanceOf[Any]))) 28 | row.update(5, new org.apache.spark.sql.catalyst.util.GenericArrayData(xyArr.map(_.asInstanceOf[Any]))) 29 | // Spark 1.5 30 | /* 31 | row.update(4, new GenericArrayData(xyNum.map(_.asInstanceOf[Any]))) 32 | row.update(5, new GenericArrayData(xyArr.map(_.asInstanceOf[Any]))) 33 | */ 34 | row 35 | } 36 | 37 | def deserialize(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]): T 38 | 39 | override def deserialize(datum: Any): T = { 40 | datum match { 41 | case row: InternalRow => { 42 | val xmin = row.getDouble(0) 43 | val ymin = row.getDouble(1) 44 | val xmax = row.getDouble(2) 45 | val ymax = row.getDouble(3) 46 | val xyNum = row.getArray(4).toIntArray() 47 | val xyArr = row.getArray(5).toDoubleArray() 48 | deserialize(xmin, ymin, xmax, ymax, xyNum, xyArr) 49 | } 50 | } 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PolygonType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import com.esri.core.geometry.{Envelope2D, Geometry, Point2D, Polygon} 4 | import org.apache.spark.sql.types.SQLUserDefinedType 5 | 6 | /** 7 | * PolygonType 8 | * 9 | * @param xyNum each element contains the number of xy pairs to read for a part 10 | * @param xyArr sequence of xy elements 11 | */ 12 | @SQLUserDefinedType(udt = classOf[PolygonUDT]) 13 | class PolygonType(override val xmin: Double, 14 | override val ymin: Double, 15 | override val xmax: Double, 16 | override val ymax: Double, 17 | override val xyNum: Array[Int], 18 | override val xyArr: Array[Double]) 19 | extends PolyType(xmin, ymin, xmax, ymax, xyNum, xyArr) { 20 | 21 | /*@transient override lazy val*/ def asGeometry(): Geometry = asPolygon() 22 | 23 | def asPolygon() = { 24 | val polygon = new Polygon() 25 | var i = 0 26 | xyNum.foreach(p => { 27 | 0 until p foreach (n => { 28 | val x = xyArr(i) 29 | i += 1 30 | val y = xyArr(i) 31 | i += 1 32 | n match { 33 | case 0 => polygon.startPath(x, y) 34 | case _ => polygon.lineTo(x, y) 35 | } 36 | }) 37 | }) 38 | polygon.closeAllPaths() 39 | polygon 40 | } 41 | } 42 | 43 | object PolygonType { 44 | def apply(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]) = { 45 | new PolygonType(xmin, ymin, xmax, ymax, xyNum, xyArr) 46 | } 47 | 48 | def apply(geometry: Geometry) = geometry match { 49 | case polygon: Polygon => { 50 | val envp = new Envelope2D() 51 | polygon.queryEnvelope2D(envp) 52 | val pathCount = polygon.getPathCount 53 | val xyNum = (0 until pathCount).map(pathIndex => polygon.getPathSize(pathIndex)).toArray 54 | val point2D = new Point2D() 55 | val numPoints = polygon.getPointCount 56 | val xyArr = new Array[Double](numPoints * 2) 57 | var i = 0 58 | // TODO - use fold 59 | (0 until numPoints).foreach(pointIndex => { 60 | polygon.getXY(pointIndex, point2D) 61 | xyArr(i) = point2D.x 62 | i += 1 63 | xyArr(i) = point2D.y 64 | i += 1 65 | }) 66 | new PolygonType(envp.xmin, envp.ymin, envp.xmax, envp.ymax, xyNum, xyArr) 67 | } 68 | case _ => throw new RuntimeException(s"Cannot create instance of PolygonType from $geometry") 69 | } 70 | 71 | def unapply(p: PolygonType) = 72 | Some((p.xmin, p.ymin, p.xmax, p.ymax, p.xyNum, p.xyArr)) 73 | } 74 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PolygonUDT.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import org.apache.spark.sql.catalyst.InternalRow 4 | 5 | /** 6 | */ 7 | class PolygonUDT extends PolyUDT[PolygonType] { 8 | 9 | override def serialize(obj: Any): InternalRow = { 10 | obj match { 11 | case PolygonType(xmin, ymin, xmax, ymax, xyNum, xyArr) => { 12 | serialize(xmin, ymin, xmax, ymax, xyNum, xyArr) 13 | } 14 | } 15 | } 16 | 17 | override def deserialize(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]) = { 18 | PolygonType(xmin, ymin, xmax, ymax, xyNum, xyArr) 19 | } 20 | 21 | override def userClass = classOf[PolygonType] 22 | 23 | override def pyUDT = "com.esri.udt.PolygonUDT" 24 | 25 | override def typeName = "polygon" 26 | 27 | override def equals(o: Any) = { 28 | o match { 29 | case v: PolygonUDT => true 30 | case _ => false 31 | } 32 | } 33 | 34 | // see [SPARK-8647], this achieves the needed constant hash code without constant no. 35 | override def hashCode(): Int = classOf[PolygonUDT].getName.hashCode() 36 | 37 | override def asNullable: PolygonUDT = this 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PolylineMType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import com.esri.core.geometry._ 4 | import org.apache.spark.sql.types.SQLUserDefinedType 5 | 6 | /** 7 | * PolylineMType 8 | * 9 | * @param xyNum each element contains the number of xy pairs to read for a part 10 | * @param xyArr sequence of xy elements 11 | */ 12 | @SQLUserDefinedType(udt = classOf[PolylineMUDT]) 13 | class PolylineMType( 14 | override val xmin: Double, 15 | override val ymin: Double, 16 | override val xmax: Double, 17 | override val ymax: Double, 18 | override val xyNum: Array[Int], 19 | override val xyArr: Array[Double]) 20 | extends PolyType(xmin, ymin, xmax, ymax, xyNum, xyArr) { 21 | 22 | /*@transient override lazy val*/ def asGeometry(): Geometry = asPolyline() 23 | 24 | def asPolyline() = { 25 | val polyline = new Polyline() 26 | var i = 0 27 | xyNum.foreach(p => { 28 | 0 until p foreach (n => { 29 | val x = xyArr(i) 30 | i += 1 31 | val y = xyArr(i) 32 | i += 1 33 | val m = xyArr(i) 34 | i += 1 35 | n match { 36 | case 0 => val p = new Point(x, y); p.setM(m); polyline.startPath(p) 37 | case _ => val p = new Point(x, y); p.setM(m); polyline.lineTo(p) 38 | } 39 | }) 40 | }) 41 | polyline 42 | } 43 | } 44 | 45 | object PolylineMType { 46 | def apply(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]) = { 47 | new PolylineMType(xmin, ymin, xmax, ymax, xyNum, xyArr) 48 | } 49 | 50 | def apply(geometry: Geometry) = geometry match { 51 | case multiPath: MultiPath => { 52 | val envp = new Envelope2D() 53 | multiPath.queryEnvelope2D(envp) 54 | val pathCount = multiPath.getPathCount 55 | val xyNum = (0 until pathCount).map(pathIndex => multiPath.getPathSize(pathIndex)).toArray 56 | val numPoints = multiPath.getPointCount 57 | val point = new Point() 58 | val xyArr = new Array[Double](numPoints * 3) 59 | (0 until numPoints).foldLeft(0)((i, pointIndex) => { 60 | multiPath.getPoint(pointIndex, point) 61 | xyArr(i) = point.getX 62 | xyArr(i + 1) = point.getY 63 | xyArr(i + 2) = point.getM 64 | i + 3 65 | }) 66 | new PolylineMType(envp.xmin, envp.ymin, envp.xmax, envp.ymax, xyNum, xyArr) 67 | } 68 | case _ => throw new RuntimeException(s"Cannot create instance of PolylineMType from $geometry") 69 | } 70 | 71 | def unapply(p: PolylineMType) = 72 | Some((p.xmin, p.ymin, p.xmax, p.ymax, p.xyNum, p.xyArr)) 73 | } 74 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PolylineMUDT.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import org.apache.spark.sql.catalyst.InternalRow 4 | 5 | /** 6 | */ 7 | class PolylineMUDT extends PolyUDT[PolylineMType] { 8 | 9 | override def serialize(obj: Any): InternalRow = { 10 | obj match { 11 | case PolylineMType(xmin, ymin, xmax, ymax, xyNum, xyArr) => serialize(xmin, ymin, xmax, ymax, xyNum, xyArr) 12 | } 13 | } 14 | 15 | override def deserialize(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]) = { 16 | PolylineMType(xmin, ymin, xmax, ymax, xyNum, xyArr) 17 | } 18 | 19 | override def userClass = classOf[PolylineMType] 20 | 21 | override def pyUDT = "com.esri.udt.PolylineMUDT" 22 | 23 | override def typeName = "polylineM" 24 | 25 | override def equals(o: Any) = { 26 | o match { 27 | case v: PolylineMUDT => true 28 | case _ => false 29 | } 30 | } 31 | 32 | // see [SPARK-8647], this achieves the needed constant hash code without constant no. 33 | override def hashCode(): Int = classOf[PolylineMUDT].getName.hashCode() 34 | 35 | override def asNullable: PolylineMUDT = this 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PolylineType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import com.esri.core.geometry._ 4 | import org.apache.spark.sql.types.SQLUserDefinedType 5 | 6 | /** 7 | * PolylineType 8 | * 9 | * @param xyNum each element contains the number of xy pairs to read for a part 10 | * @param xyArr sequence of xy elements 11 | */ 12 | @SQLUserDefinedType(udt = classOf[PolylineUDT]) 13 | class PolylineType(override val xmin: Double, 14 | override val ymin: Double, 15 | override val xmax: Double, 16 | override val ymax: Double, 17 | override val xyNum: Array[Int], 18 | override val xyArr: Array[Double]) 19 | extends PolyType(xmin, ymin, xmax, ymax, xyNum, xyArr) { 20 | 21 | /*@transient override lazy val*/ def asGeometry(): Geometry = asPolyline() 22 | 23 | def asPolyline() = { 24 | val polyline = new Polyline() 25 | var i = 0 26 | xyNum.foreach(p => { 27 | 0 until p foreach (n => { 28 | val x = xyArr(i) 29 | i += 1 30 | val y = xyArr(i) 31 | i += 1 32 | n match { 33 | case 0 => polyline.startPath(x, y) 34 | case _ => polyline.lineTo(x, y) 35 | } 36 | }) 37 | }) 38 | polyline 39 | } 40 | 41 | } 42 | 43 | object PolylineType { 44 | def apply(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]) = { 45 | new PolylineType(xmin, ymin, xmax, ymax, xyNum, xyArr) 46 | } 47 | 48 | def apply(geometry: Geometry) = geometry match { 49 | case line: Line => { 50 | val envp = new Envelope2D() 51 | line.queryEnvelope2D(envp) 52 | val xyNum = Array(1) 53 | val xyArr = Array(line.getEndX, line.getStartY, line.getEndX, line.getEndY) 54 | new PolylineType(envp.xmin, envp.ymin, envp.xmax, envp.ymax, xyNum, xyArr) 55 | } 56 | case multiPath: MultiPath => { 57 | val envp = new Envelope2D() 58 | multiPath.queryEnvelope2D(envp) 59 | val pathCount = multiPath.getPathCount 60 | val xyNum = (0 until pathCount).map(pathIndex => multiPath.getPathSize(pathIndex)).toArray 61 | val point2D = new Point2D() 62 | val numPoints = multiPath.getPointCount 63 | val xyArr = new Array[Double](numPoints * 2) 64 | var i = 0 65 | // TODO - use fold 66 | (0 until numPoints).foreach(pointIndex => { 67 | multiPath.getXY(pointIndex, point2D) 68 | xyArr(i) = point2D.x 69 | i += 1 70 | xyArr(i) = point2D.y 71 | i += 1 72 | }) 73 | new PolylineType(envp.xmin, envp.ymin, envp.xmax, envp.ymax, xyNum, xyArr) 74 | } 75 | case _ => throw new RuntimeException(s"Cannot create instance of PolylineType from $geometry") 76 | } 77 | 78 | def unapply(p: PolylineType) = 79 | Some((p.xmin, p.ymin, p.xmax, p.ymax, p.xyNum, p.xyArr)) 80 | } 81 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/PolylineUDT.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import org.apache.spark.sql.catalyst.InternalRow 4 | 5 | /** 6 | */ 7 | class PolylineUDT extends PolyUDT[PolylineType] { 8 | 9 | override def serialize(obj: Any): InternalRow = { 10 | obj match { 11 | case PolylineType(xmin, ymin, xmax, ymax, xyNum, xyArr) => serialize(xmin, ymin, xmax, ymax, xyNum, xyArr) 12 | } 13 | } 14 | 15 | override def deserialize(xmin: Double, ymin: Double, xmax: Double, ymax: Double, xyNum: Array[Int], xyArr: Array[Double]) = { 16 | PolylineType(xmin, ymin, xmax, ymax, xyNum, xyArr) 17 | } 18 | 19 | override def userClass = classOf[PolylineType] 20 | 21 | override def pyUDT = "com.esri.udt.PolylineUDT" 22 | 23 | override def typeName = "polyline" 24 | 25 | override def equals(o: Any) = { 26 | o match { 27 | case v: PolylineUDT => true 28 | case _ => false 29 | } 30 | } 31 | 32 | // see [SPARK-8647], this achieves the needed constant hash code without constant no. 33 | override def hashCode(): Int = classOf[PolylineUDT].getName.hashCode() 34 | 35 | override def asNullable: PolylineUDT = this 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/Segment.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | /** 4 | */ 5 | case class Segment(val ax: Double, val ay: Double, val am: Double, val bx: Double, val by: Double, val bm: Double) -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/SegmentIterator.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | /** 4 | */ 5 | case class SegmentIterator(poly: PolyType) extends Iterator[Segment] { 6 | 7 | private var i: Int = 0 8 | private val m = poly.xyNum.length 9 | 10 | private var j: Int = 1 11 | private var n: Int = if (poly.xyNum.isEmpty) 0 else poly.xyNum.head 12 | 13 | private var a: Int = 0 14 | private var b: Int = 3 15 | 16 | private var hasMore = i < m && j < n 17 | 18 | override def hasNext() = { 19 | hasMore 20 | } 21 | 22 | override def next() = { 23 | val ax = poly.xyArr(a) 24 | val ay = poly.xyArr(a + 1) 25 | val am = poly.xyArr(a + 2) 26 | 27 | val bx = poly.xyArr(b) 28 | val by = poly.xyArr(b + 1) 29 | val bm = poly.xyArr(b + 2) 30 | 31 | a += 3 32 | b += 3 33 | j += 1 34 | 35 | // End of part ? 36 | if (j == n) { 37 | j = 1 38 | i += 1 39 | // End of all parts ? 40 | if (i < m) { 41 | n = poly.xyNum(i) 42 | a += 3 43 | b += 3 44 | } 45 | else { 46 | hasMore = false 47 | } 48 | } 49 | 50 | Segment(ax, ay, am, bx, by, bm) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/scala/com/esri/udt/SpatialType.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import com.esri.core.geometry.Geometry 4 | 5 | trait SpatialType extends Serializable { 6 | def asGeometry(): Geometry 7 | } 8 | -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000001.TablesByName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000001.TablesByName.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000001.freelist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000001.freelist -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000001.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000001.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000001.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000001.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000001.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000001.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000002.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000002.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000002.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000002.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000003.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000003.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000003.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000003.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000003.gdbtablx: -------------------------------------------------------------------------------- 1 | $  -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000004.CatItemsByPhysicalName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000004.CatItemsByPhysicalName.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000004.CatItemsByType.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000004.CatItemsByType.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000004.FDO_UUID.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000004.FDO_UUID.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000004.freelist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000004.freelist -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000004.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000004.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000004.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000004.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000004.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000004.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000004.spx: -------------------------------------------------------------------------------- 1 | @ -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000005.CatItemTypesByName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000005.CatItemTypesByName.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000005.CatItemTypesByParentTypeID.atx: -------------------------------------------------------------------------------- 1 | "  2 | !" {00000000-0000-0000-0000-000000000000}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{77292603-930F-475D-AE4F-B8970F42F394}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D4912162-3413-476E-9DA4-2AEFBBC16939}{FFD09C28-FE70-4E25-907C-AF8E8A5EC5F3}&" -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000005.CatItemTypesByUUID.atx: -------------------------------------------------------------------------------- 1 | ""  ! 2 |  {28DA9E89-FF80-4D6D-8926-4EE2B161677D}{35B601F7-45CE-4AFF-ADB7-7702D3839B12}{4ED4A58E-621F-4043-95ED-850FBA45FCBC}{5B966567-FB87-4DDE-938B-B4B37423539D}{5ED667A3-9CA9-44A2-8029-D95BF23704B9}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{73718A66-AFB9-4B88-A551-CFFA0AE12620}{74737149-DCB5-4257-8904-B9724E32A530}{76357537-3364-48AF-A4BE-783C7C28B5CB}{767152D3-ED66-4325-8774-420D46674E07}{77292603-930F-475D-AE4F-B8970F42F394}{7771FC7D-A38B-4FD3-8225-639D17E9A131}{787BEA35-4A86-494F-BB48-500B96145B58}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{8C368B12-A12E-4C7E-9638-C9C64E69E98F}{A300008D-0CEA-4F6A-9DFA-46AF829A3DF2}{A3803369-5FC2-4963-BAE0-13EFFC09DD73}{B606A7E1-FA5B-439C-849C-6E9C2481537B}{C29DA988-8C3E-45F7-8B5C-18E51EE7BEB4}{C673FE0F-7280-404F-8532-20755DD8FC06}{CD06BC3B-789D-4C51-AAFA-A467912B8965}{D4912162-3413-476E-9DA4-2AEFBBC16939}{D5A40288-029E-4766-8C81-DE3F61129371}{D86502F9-9758-45C6-9D23-6DD1A0107B47}{D98421EB-D582-4713-9484-43304D0810F6}{DB1B697A-3BB6-426A-98A2-6EE7A4C6AED3}{DC64B6E4-DC0F-43BD-B4F5-F22385DCF055}{DC9EF677-1AA3-45A7-8ACD-303A5202D0DC}{E6302665-416B-44FA-BE33-4E15916BA101}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}{F8413DCB-2248-4935-BFE9-315F397E5110}{FBDD7DD6-4A25-40B7-9A1A-ECC3D1172447}{FFD09C28-FE70-4E25-907C-AF8E8A5EC5F3}&" -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000005.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000005.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000005.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000005.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000005.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000005.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000006.CatRelsByDestinationID.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000006.CatRelsByDestinationID.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000006.CatRelsByOriginID.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000006.CatRelsByOriginID.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000006.CatRelsByType.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000006.CatRelsByType.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000006.FDO_UUID.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000006.FDO_UUID.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000006.freelist: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000006.freelist -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000006.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000006.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000006.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000006.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000006.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000006.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000007.CatRelTypesByBackwardLabel.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000007.CatRelTypesByBackwardLabel.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000007.CatRelTypesByDestItemTypeID.atx: -------------------------------------------------------------------------------- 1 |   2 |   {28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{8405ADD5-8DF8-4227-8FAC-3FCADE073386}{8637F1ED-8C04-4866-A44A-1CB8288B3C63}{A300008D-0CEA-4F6A-9DFA-46AF829A3DF2}{CD06BC3B-789D-4C51-AAFA-A467912B8965}{CD06BC3B-789D-4C51-AAFA-A467912B8965}{D86502F9-9758-45C6-9D23-6DD1A0107B47}{D98421EB-D582-4713-9484-43304D0810F6}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}& -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000007.CatRelTypesByForwardLabel.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000007.CatRelTypesByForwardLabel.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000007.CatRelTypesByName.atx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000007.CatRelTypesByName.atx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000007.CatRelTypesByOriginItemTypeID.atx: -------------------------------------------------------------------------------- 1 |   2 |   {28DA9E89-FF80-4D6D-8926-4EE2B161677D}{28DA9E89-FF80-4D6D-8926-4EE2B161677D}{4ED4A58E-621F-4043-95ED-850FBA45FCBC}{5B966567-FB87-4DDE-938B-B4B37423539D}{70737809-852C-4A03-9E22-2CECEA5B9BFA}{73718A66-AFB9-4B88-A551-CFFA0AE12620}{74737149-DCB5-4257-8904-B9724E32A530}{76357537-3364-48AF-A4BE-783C7C28B5CB}{767152D3-ED66-4325-8774-420D46674E07}{7771FC7D-A38B-4FD3-8225-639D17E9A131}{7771FC7D-A38B-4FD3-8225-639D17E9A131}{A3803369-5FC2-4963-BAE0-13EFFC09DD73}{A3803369-5FC2-4963-BAE0-13EFFC09DD73}{D86502F9-9758-45C6-9D23-6DD1A0107B47}{D98421EB-D582-4713-9484-43304D0810F6}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}{F3783E6F-65CA-4514-8315-CE3985DAD3B1}& -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000007.CatRelTypesByUUID.atx: -------------------------------------------------------------------------------- 1 |      2 | {0D10B3A7-2F64-45E6-B7AC-2FC27BF2133C}{17E08ADB-2B31-4DCD-8FDD-DF529E88F843}{55D2F4DC-CB17-4E32-A8C7-47591E8C71DE}{583A5BAA-3551-41AE-8AA8-1185719F3889}{5DD0C1AF-CB3D-4FEA-8C51-CB3BA8D77CDB}{5F9085E0-788F-4354-AE3C-34C83A7EA784}{725BADAB-3452-491B-A795-55F32D67229C}{79CC71C8-B7D9-4141-9014-B6373E236ABB}{8DB31AF1-DF7C-4632-AA10-3CC44B0C6914}{908A4670-1111-48C6-8269-134FDD3FE617}{A1633A59-46BA-4448-8706-D8ABE2B2B02E}{B32B8563-0B96-4D32-92C4-086423AE9962}{CC28387C-441F-4D7C-A802-41A160317FE0}{D022DE33-45BD-424C-88BF-5B1B6B957BD3}{D088B110-190B-4229-BDF7-89FDDD14D1EA}{DC739A70-9B71-41E8-868C-008CF46F16D7}{DC78F1AB-34E4-43AC-BA47-1C4EABD0E7C7}{E79B44E3-F833-4B12-90A1-364EC4DDC43E}& -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000007.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000007.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000007.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000007.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000007.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000007.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000009.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000009.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000009.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000009.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000009.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000009.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000009.spx: -------------------------------------------------------------------------------- 1 | @ -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000a.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000a.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000a.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000a.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000a.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000a.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000a.spx: -------------------------------------------------------------------------------- 1 | @ -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000d.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000d.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000d.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000d.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000d.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000d.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000d.spx: -------------------------------------------------------------------------------- 1 | @ -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000e.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000e.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000e.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000e.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000e.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000e.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000e.spx: -------------------------------------------------------------------------------- 1 | @ -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000f.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000f.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000f.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000f.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000f.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a0000000f.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a0000000f.spx: -------------------------------------------------------------------------------- 1 | @ -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000010.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000010.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000010.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000010.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000010.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000010.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000010.spx: -------------------------------------------------------------------------------- 1 | @ -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000011.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000011.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000011.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000011.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000011.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000011.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000011.spx: -------------------------------------------------------------------------------- 1 | @ -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000012.gdbindexes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000012.gdbindexes -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000012.gdbtable: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000012.gdbtable -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000012.gdbtablx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/a00000012.gdbtablx -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/a00000012.spx: -------------------------------------------------------------------------------- 1 | @ -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/gdb: -------------------------------------------------------------------------------- 1 |  -------------------------------------------------------------------------------- /src/test/resources/Test.gdb/timestamps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mraad/spark-gdb/a4bcd29a7f435d7b1c7b8ac18eebd29cb7b7de62/src/test/resources/Test.gdb/timestamps -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # Set everything to be logged to the console 2 | log4j.rootCategory=WARN, console 3 | log4j.appender.console=org.apache.log4j.ConsoleAppender 4 | log4j.appender.console.target=System.out 5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 6 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss} %p %c{1}: %m%n 7 | 8 | # Settings to quiet third party logs that are too verbose 9 | log4j.logger.org.eclipse.jetty=WARN 10 | log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR 11 | log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=WARN 12 | log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=WARN 13 | -------------------------------------------------------------------------------- /src/test/scala/com/esri/app/GDBApp.scala: -------------------------------------------------------------------------------- 1 | package com.esri.app 2 | 3 | import com.esri.gdb.{GDBIndex, GDBTable} 4 | import resource._ 5 | 6 | object GDBApp extends App { 7 | val gdb = "/Users/mraad_admin/Share/VZ/OnlyHighways.gdb" 8 | 9 | GDBTable.listTables(gdb).foreach(println) 10 | 11 | doCat 12 | 13 | def doCat: Unit = { 14 | GDBTable.findTable(gdb, "Interstates") 15 | .foreach(catTab => { 16 | for { 17 | index <- managed(GDBIndex(gdb, catTab.hexName)) 18 | table <- managed(GDBTable(gdb, catTab.hexName)) 19 | } { 20 | val count = table 21 | .seekIterator(index.iterator()) 22 | .count(m => true) 23 | println(count) 24 | } 25 | }) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/test/scala/com/esri/app/Main.scala: -------------------------------------------------------------------------------- 1 | package com.esri.app 2 | 3 | import com.esri.core.geometry.Polyline 4 | import com.esri.udt.{PointType, PolylineType} 5 | import org.apache.spark.sql.{SQLContext, SaveMode} 6 | import org.apache.spark.{Logging, SparkConf, SparkContext} 7 | 8 | /** 9 | */ 10 | object Main extends App with Logging { 11 | 12 | val (path, name) = args.length match { 13 | case 2 => (args(0), args(1)) 14 | case _ => throw new IllegalArgumentException("Missing path and name") 15 | } 16 | val conf = new SparkConf() 17 | .setAppName("Main") 18 | .setMaster("local[*]") 19 | .set("spark.app.id", "Main") 20 | .set("spark.ui.enabled", "false") 21 | .set("spark.ui.showConsoleProgress", "false") 22 | .registerKryoClasses(Array()) 23 | 24 | val sc = new SparkContext(conf) 25 | try { 26 | /* 27 | sc.gdbFile("/Users/mraad_admin/Share/World.gdb", "Cities", 1) 28 | .map(row => { 29 | row.getAs[PointType](row.fieldIndex("Shape")).asGeometry 30 | }) 31 | .map(point => { 32 | (point.getX, point.getY) 33 | }) 34 | .foreach(println) 35 | */ 36 | 37 | val sqlContext = new SQLContext(sc) 38 | val df = sqlContext.read.format("com.esri.gdb") 39 | .option("path", path) 40 | .option("name", name) 41 | .option("numPartitions", "1") 42 | .load() 43 | df.printSchema() 44 | df.registerTempTable(name) 45 | sqlContext.udf.register("getX", (point: PointType) => point.x) 46 | sqlContext.udf.register("getY", (point: PointType) => point.y) 47 | sqlContext.udf.register("line", (point: PointType) => PolylineType({ 48 | val polyline = new Polyline() 49 | polyline.startPath(point.x - 2, point.y - 2) 50 | polyline.lineTo(point.x + 2, point.y + 2) 51 | polyline 52 | } 53 | )) 54 | sqlContext.sql(s"select line(Shape),getX(Shape)-2 as x from $name") 55 | .write 56 | .mode(SaveMode.Overwrite) 57 | .format("json") 58 | .save(s"/tmp/$name.json") 59 | } finally { 60 | sc.stop() 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/test/scala/com/esri/gdb/GDBSuite.scala: -------------------------------------------------------------------------------- 1 | package com.esri.gdb 2 | 3 | import com.esri.core.geometry.Envelope2D 4 | import com.esri.udt.{PointType, PolygonType, PolylineMType, PolylineType} 5 | import org.apache.spark.sql.{DataFrame, SQLContext} 6 | import org.apache.spark.{SparkConf, SparkContext} 7 | import org.joda.time.{DateTime, DateTimeZone} 8 | import org.scalatest.{BeforeAndAfterAll, FunSuite} 9 | 10 | /** 11 | */ 12 | class GDBSuite extends FunSuite with BeforeAndAfterAll { 13 | private var sc: SparkContext = _ 14 | private var sqlContext: SQLContext = _ 15 | 16 | override protected def beforeAll(): Unit = { 17 | super.beforeAll() 18 | val config = new SparkConf() 19 | .setMaster("local") 20 | .setAppName("GDBSuite") 21 | .set("spark.ui.enabled", "false") 22 | .set("spark.ui.showConsoleProgress", "false") 23 | sc = new SparkContext(config) 24 | sqlContext = new SQLContext(sc) 25 | } 26 | 27 | override protected def afterAll(): Unit = { 28 | try { 29 | sqlContext.sparkContext.stop() 30 | } finally { 31 | super.afterAll() 32 | } 33 | } 34 | 35 | private val gdbPath = "src/test/resources/Test.gdb" 36 | 37 | test("Points") { 38 | doPoints(sqlContext.gdbFile(gdbPath, "Points", 2)) 39 | } 40 | 41 | def doPoints(dataFrame: DataFrame): Unit = { 42 | val xyTolerance = dataFrame.schema("Shape").metadata.getDouble("xyTolerance") 43 | 44 | val results = dataFrame.select("Shape", "X", "Y", "RID", "OBJECTID") 45 | results.collect.foreach(row => { 46 | val point = row.getAs[PointType](0) 47 | val x = row.getDouble(1) 48 | val y = row.getDouble(2) 49 | val rid = row.getInt(3) 50 | val oid = row.getInt(4) 51 | assert((point.x - x).abs <= xyTolerance) 52 | assert((point.y - y).abs <= xyTolerance) 53 | assert(rid === oid) 54 | }) 55 | } 56 | 57 | test("Lines") { 58 | doLines(sqlContext.gdbFile(gdbPath, "Lines", 2)) 59 | } 60 | 61 | def doLines(dataFrame: DataFrame): Unit = { 62 | val xyTolerance = dataFrame.schema("Shape").metadata.getDouble("xyTolerance") 63 | 64 | val results = dataFrame.select("Shape", 65 | "X1", "Y1", 66 | "X2", "Y2", 67 | "X3", "Y3", 68 | "RID", 69 | "OBJECTID") 70 | results.collect.foreach(row => { 71 | val polyline = row.getAs[PolylineType](0) 72 | val x1 = row.getDouble(1) 73 | val y1 = row.getDouble(2) 74 | val x2 = row.getDouble(3) 75 | val y2 = row.getDouble(4) 76 | val x3 = row.getDouble(5) 77 | val y3 = row.getDouble(6) 78 | val rid = row.getInt(7) 79 | val oid = row.getInt(8) 80 | 81 | assert(polyline.xyNum.length === 1) 82 | assert(polyline.xyNum(0) === 3) 83 | 84 | assert(polyline.xyArr.length === 6) 85 | 86 | assert((polyline.xyArr(0) - x1).abs <= xyTolerance) 87 | assert((polyline.xyArr(1) - y1).abs <= xyTolerance) 88 | 89 | assert((polyline.xyArr(2) - x2).abs <= xyTolerance) 90 | assert((polyline.xyArr(3) - y2).abs <= xyTolerance) 91 | 92 | assert((polyline.xyArr(4) - x3).abs <= xyTolerance) 93 | assert((polyline.xyArr(5) - y3).abs <= xyTolerance) 94 | 95 | assert(rid === oid) 96 | }) 97 | } 98 | 99 | test("MLines") { 100 | doMLines(sqlContext.gdbFile(gdbPath, "MLines", 2)) 101 | } 102 | 103 | def doMLines(dataFrame: DataFrame): Unit = { 104 | val metadata = dataFrame.schema("Shape").metadata 105 | val xyTolerance = metadata.getDouble("xyTolerance") 106 | val mTolerance = metadata.getDouble("mTolerance") 107 | 108 | val results = dataFrame.select("Shape", 109 | "X1", "Y1", "M1", 110 | "X2", "Y2", "M2", 111 | "X3", "Y3", "M3", 112 | "RID", 113 | "OBJECTID") 114 | results.collect.foreach(row => { 115 | val polyline = row.getAs[PolylineMType](0) 116 | val x1 = row.getDouble(1) 117 | val y1 = row.getDouble(2) 118 | val m1 = row.getDouble(3) 119 | val x2 = row.getDouble(4) 120 | val y2 = row.getDouble(5) 121 | val m2 = row.getDouble(6) 122 | val x3 = row.getDouble(7) 123 | val y3 = row.getDouble(8) 124 | val m3 = row.getDouble(9) 125 | val rid = row.getInt(10) 126 | val oid = row.getInt(11) 127 | 128 | assert(rid === oid) 129 | 130 | assert(polyline.xyNum.length === 1) 131 | assert(polyline.xyNum(0) === 3) 132 | 133 | assert(polyline.xyArr.length === 9) 134 | 135 | assert((polyline.xyArr(0) - x1).abs <= xyTolerance) 136 | assert((polyline.xyArr(1) - y1).abs <= xyTolerance) 137 | assert((polyline.xyArr(2) - m1).abs <= mTolerance) 138 | 139 | assert((polyline.xyArr(3) - x2).abs <= xyTolerance) 140 | assert((polyline.xyArr(4) - y2).abs <= xyTolerance) 141 | assert((polyline.xyArr(5) - m2).abs <= mTolerance) 142 | 143 | assert((polyline.xyArr(6) - x3).abs <= xyTolerance) 144 | assert((polyline.xyArr(7) - y3).abs <= xyTolerance) 145 | assert((polyline.xyArr(8) - m3).abs <= mTolerance) 146 | }) 147 | } 148 | 149 | test("Polygons") { 150 | doPolygons(sqlContext.gdbFile(gdbPath, "Polygons", 2)) 151 | } 152 | 153 | def doPolygons(dataFrame: DataFrame): Unit = { 154 | val xyTolerance = dataFrame.schema("Shape").metadata.getDouble("xyTolerance") 155 | 156 | val results = dataFrame.select("Shape", 157 | "X1", "Y1", 158 | "X2", "Y2", 159 | "RID", 160 | "OBJECTID") 161 | results.collect.foreach(row => { 162 | val polygon = row.getAs[PolygonType](0) 163 | val x1 = row.getDouble(1) 164 | val y1 = row.getDouble(2) 165 | val x2 = row.getDouble(3) 166 | val y2 = row.getDouble(4) 167 | val rid = row.getInt(5) 168 | val oid = row.getInt(6) 169 | 170 | val envp = new Envelope2D() 171 | polygon.asGeometry.queryEnvelope2D(envp) 172 | 173 | assert((envp.xmin - x1).abs < xyTolerance) 174 | assert((envp.ymin - y1).abs < xyTolerance) 175 | 176 | assert((envp.xmax - x2).abs < xyTolerance) 177 | assert((envp.ymax - y2).abs < xyTolerance) 178 | 179 | assert(rid === oid) 180 | }) 181 | } 182 | 183 | test("DDL test") { 184 | sqlContext.sql( 185 | s""" 186 | |CREATE TEMPORARY TABLE points 187 | |USING com.esri.gdb 188 | |OPTIONS (path "$gdbPath", name "Points", numPartitions "1") 189 | """.stripMargin.replaceAll("\n", " ")) 190 | 191 | assert(sqlContext.sql("SELECT * FROM points").collect().length === 20) 192 | } 193 | 194 | test("Field names, aliases and values") { 195 | val dataframe = sqlContext.gdbFile(gdbPath, "Types", 2) 196 | val schema = dataframe.schema 197 | 198 | val fieldShape = schema("Shape") 199 | assert(fieldShape.name === "Shape") 200 | assert(fieldShape.metadata.getString("alias") === "Shape") 201 | 202 | val fieldAText = schema("A_TEXT") 203 | assert(fieldAText.name === "A_TEXT") 204 | assert(fieldAText.metadata.getString("alias") === "A Text") 205 | assert(fieldAText.metadata.getLong("maxLength") === 32) 206 | 207 | val fieldAFloat = schema("A_FLOAT") 208 | assert(fieldAFloat.name === "A_FLOAT") 209 | assert(fieldAFloat.metadata.getString("alias") === "A Float") 210 | 211 | val fieldADouble = schema("A_DOUBLE") 212 | assert(fieldADouble.name === "A_DOUBLE") 213 | assert(fieldADouble.metadata.getString("alias") === "A Double") 214 | 215 | val fieldAShort = schema("A_SHORT") 216 | assert(fieldAShort.name === "A_SHORT") 217 | assert(fieldAShort.metadata.getString("alias") === "A Short") 218 | 219 | val fieldALong = schema("A_LONG") 220 | assert(fieldALong.name === "A_LONG") 221 | assert(fieldALong.metadata.getString("alias") === "A Long") 222 | 223 | val fieldADate = schema("A_DATE") 224 | assert(fieldADate.name === "A_DATE") 225 | assert(fieldADate.metadata.getString("alias") === "A Date") 226 | 227 | val fieldAGuid = schema("A_GUID") 228 | assert(fieldAGuid.name === "A_GUID") 229 | assert(fieldAGuid.metadata.getString("alias") === "A GUID") 230 | 231 | val row = dataframe 232 | .select("Shape", "A_TEXT", "A_FLOAT", "A_DOUBLE", "A_SHORT", "A_LONG", "A_DATE", "A_GUID") 233 | .collect() 234 | .head 235 | 236 | val point = row.getAs[PointType](0) 237 | assert((point.x - 33.8869).abs < 0.00001) 238 | assert((point.y - 35.5131).abs < 0.00001) 239 | 240 | assert(row.getString(1) === "Beirut") 241 | 242 | assert((row.getFloat(2) - 33.8869).abs < 0.00001) 243 | assert((row.getDouble(3) - 35.5131).abs < 0.00001) 244 | 245 | assert(row.getShort(4) === 33) 246 | assert(row.getInt(5) === 35) 247 | 248 | val timestamp = row.getTimestamp(6) 249 | val datetime = new DateTime(timestamp.getTime, DateTimeZone.UTC) 250 | // 2016, 01, 01, 07, 24, 32 251 | assert(datetime.getYear === 2016) 252 | assert(datetime.getMonthOfYear === 1) 253 | assert(datetime.getDayOfMonth === 1) 254 | assert(datetime.getHourOfDay === 7) 255 | assert(datetime.getMinuteOfHour === 24) 256 | assert(datetime.getSecondOfMinute === 32) 257 | 258 | assert(row.getString(7) === "{2AA7D58D-2BF4-4943-83A8-457B70DB1871}") 259 | } 260 | 261 | } 262 | -------------------------------------------------------------------------------- /src/test/scala/com/esri/udt/SegmentIteratorSpec.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import org.scalatest._ 4 | 5 | /** 6 | */ 7 | class SegmentIteratorSpec extends FlatSpec with Matchers { 8 | 9 | it should "match no parts" in { 10 | val polylineM = PolylineMType(0.0, 0.0, 0.0, 0.0, Array.empty[Int], Array.empty[Double]) 11 | SegmentIterator(polylineM).hasNext shouldBe false 12 | } 13 | 14 | it should "match 0 parts" in { 15 | val polylineM = PolylineMType(0.0, 0.0, 0.0, 0.0, Array(0), Array.empty[Double]) 16 | SegmentIterator(polylineM).hasNext shouldBe false 17 | } 18 | 19 | it should "match 1 part with 1 segment" in { 20 | val polylineM = PolylineMType(0.0, 0.0, 0.0, 0.0, Array(2), Array(10, 20, 30, 20, 30, 40)) 21 | 22 | val segmentIterator = SegmentIterator(polylineM) 23 | segmentIterator.hasNext shouldBe true 24 | segmentIterator.next shouldBe Segment(10, 20, 30, 20, 30, 40) 25 | segmentIterator.hasNext shouldBe false 26 | } 27 | 28 | it should "match 1 part with 2 segments" in { 29 | val polylineM = PolylineMType(0.0, 0.0, 0.0, 0.0, Array(3), Array(10, 20, 30, 20, 30, 40, 30, 40, 50)) 30 | 31 | val segmentIterator = SegmentIterator(polylineM) 32 | segmentIterator.hasNext shouldBe true 33 | segmentIterator.next shouldBe Segment(10, 20, 30, 20, 30, 40) 34 | segmentIterator.hasNext shouldBe true 35 | segmentIterator.next shouldBe Segment(20, 30, 40, 30, 40, 50) 36 | segmentIterator.hasNext shouldBe false 37 | } 38 | 39 | it should "match 2 parts with 1 segment" in { 40 | 41 | val polylineM = new PolylineMType(0.0, 0.0, 0.0, 0.0, Array(2, 2), Array(10, 20, 30, 20, 30, 40, 20, 30, 40, 30, 40, 50)) 42 | 43 | val segmentIterator = SegmentIterator(polylineM) 44 | segmentIterator.hasNext shouldBe true 45 | segmentIterator.next() shouldBe Segment(10, 20, 30, 20, 30, 40) 46 | 47 | segmentIterator.hasNext shouldBe true 48 | segmentIterator.next() shouldBe Segment(20, 30, 40, 30, 40, 50) 49 | 50 | segmentIterator.hasNext shouldBe false 51 | } 52 | 53 | it should "match 2 parts with 3,2 segments" in { 54 | 55 | val polylineM = new PolylineMType(0.0, 0.0, 0.0, 0.0, 56 | Array(4, 3), 57 | Array( 58 | 10, 20, 30, 59 | 20, 30, 40, 60 | 30, 40, 50, 61 | 40, 50, 60, 62 | 20, 30, 40, 63 | 30, 40, 50, 64 | 40, 50, 60)) 65 | 66 | val segmentIterator = SegmentIterator(polylineM) 67 | 68 | segmentIterator.hasNext shouldBe true 69 | segmentIterator.next() shouldBe Segment(10, 20, 30, 20, 30, 40) 70 | 71 | segmentIterator.hasNext shouldBe true 72 | segmentIterator.next() shouldBe Segment(20, 30, 40, 30, 40, 50) 73 | 74 | segmentIterator.hasNext shouldBe true 75 | segmentIterator.next() shouldBe Segment(30, 40, 50, 40, 50, 60) 76 | 77 | segmentIterator.hasNext shouldBe true 78 | segmentIterator.next() shouldBe Segment(20, 30, 40, 30, 40, 50) 79 | 80 | segmentIterator.hasNext shouldBe true 81 | segmentIterator.next() shouldBe Segment(30, 40, 50, 40, 50, 60) 82 | 83 | segmentIterator.hasNext shouldBe false 84 | } 85 | 86 | it should "match 3 parts with 1,1,1 segments" in { 87 | 88 | val polylineM = new PolylineMType(0.0, 0.0, 0.0, 0.0, 89 | Array(2, 2, 2), 90 | Array( 91 | 10, 20, 30, 92 | 20, 30, 40, 93 | 30, 40, 50, 94 | 40, 50, 60, 95 | 20, 30, 40, 96 | 30, 40, 50)) 97 | 98 | val segmentIterator = SegmentIterator(polylineM) 99 | 100 | segmentIterator.hasNext shouldBe true 101 | segmentIterator.next() shouldBe Segment(10, 20, 30, 20, 30, 40) 102 | 103 | segmentIterator.hasNext shouldBe true 104 | segmentIterator.next() shouldBe Segment(30, 40, 50, 40, 50, 60) 105 | 106 | segmentIterator.hasNext shouldBe true 107 | segmentIterator.next() shouldBe Segment(20, 30, 40, 30, 40, 50) 108 | 109 | segmentIterator.hasNext shouldBe false 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/test/scala/com/esri/udt/SerDeSpec.scala: -------------------------------------------------------------------------------- 1 | package com.esri.udt 2 | 3 | import org.scalatest._ 4 | 5 | import scala.util.Random 6 | 7 | /** 8 | */ 9 | final class SerDeSpec extends FlatSpec with Matchers { 10 | 11 | val rand = new Random(System.currentTimeMillis()) 12 | 13 | it should "SerDe PointUDT" in { 14 | 15 | val x = rand.nextDouble 16 | val y = rand.nextDouble 17 | 18 | val udt = new PointUDT 19 | val p = udt.deserialize(udt.serialize(new PointType(x, y))) 20 | 21 | p match { 22 | case PointType(px, py) => { 23 | px shouldBe x 24 | py shouldBe y 25 | } 26 | } 27 | p should equal(new PointType(x, y)) 28 | } 29 | 30 | it should "SerDe PointZUDT" in { 31 | 32 | val x = rand.nextDouble 33 | val y = rand.nextDouble 34 | val z = rand.nextDouble 35 | 36 | val udt = new PointZUDT 37 | val p = udt.deserialize(udt.serialize(new PointZType(x, y, z))) 38 | 39 | p match { 40 | case PointZType(px, py, pz) => { 41 | px shouldBe x 42 | py shouldBe y 43 | pz shouldBe z 44 | } 45 | } 46 | p should equal(new PointZType(x, y, z)) 47 | } 48 | 49 | it should "SerDe PointMUDT" in { 50 | 51 | val x = rand.nextDouble 52 | val y = rand.nextDouble 53 | val m = rand.nextDouble 54 | 55 | val udt = new PointMUDT 56 | val p = udt.deserialize(udt.serialize(new PointMType(x, y, m))) 57 | 58 | p match { 59 | case PointMType(px, py, pm) => { 60 | px shouldBe x 61 | py shouldBe y 62 | pm shouldBe m 63 | } 64 | } 65 | p should equal(new PointMType(x, y, m)) 66 | } 67 | 68 | it should "SerDe PointZMUDT" in { 69 | 70 | val x = rand.nextDouble 71 | val y = rand.nextDouble 72 | val z = rand.nextDouble 73 | val m = rand.nextDouble 74 | 75 | val udt = new PointZMUDT 76 | val p = udt.deserialize(udt.serialize(new PointZMType(x, y, z, m))) 77 | 78 | p match { 79 | case PointZMType(px, py, pz, pm) => { 80 | px shouldBe x 81 | py shouldBe y 82 | pz shouldBe z 83 | pm shouldBe m 84 | } 85 | } 86 | p should equal(new PointZMType(x, y, z, m)) 87 | } 88 | 89 | it should "SerDe PolylineUDT" in { 90 | val xmin = rand.nextDouble 91 | val ymin = rand.nextDouble 92 | val xmax = rand.nextDouble 93 | val ymax = rand.nextDouble 94 | val udt = new PolylineUDT 95 | val p = udt.deserialize(udt.serialize(new PolylineType(xmin, ymin, xmax, ymax, Array(1), Array(xmin, ymin, xmax, ymax)))) 96 | 97 | p match { 98 | case PolylineType(pxmin, pymin, pxmax, pymax, pxyNum, pxyArr) => { 99 | pxmin shouldBe xmin 100 | pymin shouldBe ymin 101 | pxmax shouldBe xmax 102 | pymax shouldBe ymax 103 | pxyNum shouldBe Array(1) 104 | pxyArr shouldBe Array(xmin, ymin, xmax, ymax) 105 | } 106 | } 107 | p should equal(new PolylineType(xmin, ymin, xmax, ymax, Array(1), Array(xmin, ymin, xmax, ymax))) 108 | } 109 | 110 | it should "SerDe PolylineMUDT" in { 111 | val xmin = rand.nextDouble 112 | val ymin = rand.nextDouble 113 | val xmax = rand.nextDouble 114 | val ymax = rand.nextDouble 115 | val m1 = rand.nextDouble 116 | val m2 = rand.nextDouble 117 | val udt = new PolylineMUDT 118 | val p = udt.deserialize(udt.serialize(new PolylineMType(xmin, ymin, xmax, ymax, Array(1), Array(xmin, ymin, m1, xmax, ymax, m2)))) 119 | 120 | p match { 121 | case PolylineMType(pxmin, pymin, pxmax, pymax, pxyNum, pxyArr) => { 122 | pxmin shouldBe xmin 123 | pymin shouldBe ymin 124 | pxmax shouldBe xmax 125 | pymax shouldBe ymax 126 | pxyNum shouldBe Array(1) 127 | pxyArr shouldBe Array(xmin, ymin, m1, xmax, ymax, m2) 128 | } 129 | } 130 | p should equal(new PolylineMType(xmin, ymin, xmax, ymax, Array(1), Array(xmin, ymin, m1, xmax, ymax, m2))) 131 | } 132 | 133 | it should "SerDe PolygonUDT" in { 134 | val xmin = rand.nextDouble 135 | val ymin = rand.nextDouble 136 | val xmax = rand.nextDouble 137 | val ymax = rand.nextDouble 138 | val udt = new PolygonUDT 139 | val p = udt.deserialize(udt.serialize(new PolygonType(xmin, ymin, xmax, ymax, Array(1), Array(xmin, ymin, xmax, ymax)))) 140 | 141 | p match { 142 | case PolygonType(pxmin, pymin, pxmax, pymax, pxyNum, pxyArr) => { 143 | pxmin shouldBe xmin 144 | pymin shouldBe ymin 145 | pxmax shouldBe xmax 146 | pymax shouldBe ymax 147 | pxyNum shouldBe Array(1) 148 | pxyArr shouldBe Array(xmin, ymin, xmax, ymax) 149 | } 150 | } 151 | p should equal(new PolygonType(xmin, ymin, xmax, ymax, Array(1), Array(xmin, ymin, xmax, ymax))) 152 | } 153 | } 154 | --------------------------------------------------------------------------------