├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── SECURITY.md ├── build.sbt ├── build ├── sbt └── sbt-launch-lib.bash ├── project ├── build.properties └── plugins.sbt ├── python ├── MANIFEST.in ├── requirements.txt ├── setup.cfg ├── setup.py ├── spark-package-deps.txt ├── sparkimages │ ├── ImageSchema.py │ └── __init__.py └── tests.py └── src ├── main └── scala │ └── org │ └── apache │ └── spark │ └── image │ ├── HadoopUtils.scala │ └── ImageSchema.scala └── test ├── resources └── images │ ├── CIFAR10-sample │ ├── 1006.png │ ├── 10064.png │ ├── 10071.png │ ├── 10328.png │ ├── 1052.png │ ├── 10660.png │ ├── 10661.png │ ├── 10748.png │ ├── 10787.png │ ├── 10815.png │ ├── 10822.png │ ├── 11112.png │ ├── 11210.png │ ├── 11240.png │ ├── 11248.png │ ├── 11430.png │ ├── 11589.png │ ├── 11688.png │ ├── 11731.png │ ├── 11872.png │ ├── 1201.png │ ├── 12172.png │ ├── 12435.png │ ├── 1249.png │ ├── 1252.png │ ├── 12940.png │ ├── 13078.png │ ├── 13144.png │ ├── 13163.png │ ├── 13208.png │ ├── 13345.png │ ├── 13355.png │ ├── 13358.png │ ├── 13411.png │ ├── 13485.png │ ├── 13606.png │ ├── 13727.png │ ├── 13758.png │ ├── 13805.png │ ├── 13940.png │ ├── 13950.png │ ├── 14150.png │ ├── 1422.png │ ├── 14459.png │ ├── 14513.png │ ├── 14515.png │ ├── 14629.png │ ├── 14680.png │ ├── 14695.png │ ├── 14829.png │ ├── 15027.png │ ├── 15274.png │ ├── 15303.png │ ├── 1536.png │ ├── 15424.png │ ├── 15493.png │ ├── 15593.png │ ├── 15601.png │ ├── 15658.png │ ├── 15670.png │ ├── 15786.png │ ├── 15848.png │ ├── 15946.png │ ├── 15992.png │ ├── 16014.png │ ├── 16348.png │ ├── 16508.png │ ├── 17273.png │ ├── 17390.png │ ├── 17435.png │ ├── 17647.png │ ├── 17739.png │ ├── 17909.png │ ├── 18104.png │ ├── 18173.png │ ├── 18218.png │ ├── 18261.png │ ├── 1828.png │ ├── 18343.png │ ├── 18371.png │ ├── 18381.png │ ├── 18611.png │ ├── 18657.png │ ├── 1877.png │ ├── 18810.png │ ├── 18937.png │ ├── 18979.png │ ├── 19185.png │ ├── 19509.png │ ├── 19563.png │ ├── 19927.png │ ├── 19967.png │ ├── 19971.png │ ├── 20093.png │ ├── 201.png │ ├── 20311.png │ ├── 20382.png │ ├── 2051.png │ ├── 20542.png │ ├── 20591.png │ └── not-image.txt │ └── multi-channel │ ├── BGR.png │ ├── BGRA.png │ └── grayscale.png └── scala └── org └── apache └── spark └── image ├── TestImageSchema.scala └── TestSparkContext.scala /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | *.jar 4 | __pycache__ 5 | target 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: scala 2 | scala: 3 | - 2.11.8 4 | dist: xenial 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/Microsoft/spark-images.svg?branch=master)](https://travis-ci.org/Microsoft/spark-images) 2 | 3 | # Spark Images 4 | This repository contains an implementation of schema for representing images inside Spark SQL ``DataFrame``s. 5 | 6 | This work has been contributed to [Apache Spark Release 2.3](https://spark.apache.org/releases/spark-release-2-3-0.html). 7 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | // Your sbt build file. Guides on how to write one can be found at 2 | // http://www.scala-sbt.org/0.13/docs/index.html 3 | 4 | scalaVersion := "2.11.8" 5 | 6 | sparkVersion := "2.2.0" 7 | 8 | spName := "microsoft/spark-images" 9 | 10 | // Don't forget to set the version 11 | version := "0.1" 12 | 13 | // All Spark Packages need a license 14 | licenses := Seq("Apache-2.0" -> url("http://opensource.org/licenses/Apache-2.0")) 15 | 16 | 17 | // Add Spark components this package depends on, e.g, "mllib", .... 18 | sparkComponents ++= Seq("sql") 19 | 20 | libraryDependencies ++= Seq( 21 | // "%%" for scala things, "%" for plain java things 22 | "org.scalatest" %% "scalatest" % "3.0.0" % "provided" 23 | ) 24 | 25 | // uncomment and change the value below to change the directory where your zip artifact will be created 26 | // spDistDirectory := target.value 27 | 28 | // add any Spark Package dependencies using spDependencies. 29 | // e.g. spDependencies += "databricks/spark-avro:0.1" 30 | -------------------------------------------------------------------------------- /build/sbt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so 4 | # that we can run Hive to generate the golden answer. This is not required for normal development 5 | # or testing. 6 | for i in $HIVE_HOME/lib/* 7 | do HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$i 8 | done 9 | export HADOOP_CLASSPATH 10 | 11 | realpath () { 12 | ( 13 | TARGET_FILE=$1 14 | 15 | cd $(dirname $TARGET_FILE) 16 | TARGET_FILE=$(basename $TARGET_FILE) 17 | 18 | COUNT=0 19 | while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ] 20 | do 21 | TARGET_FILE=$(readlink $TARGET_FILE) 22 | cd $(dirname $TARGET_FILE) 23 | TARGET_FILE=$(basename $TARGET_FILE) 24 | COUNT=$(($COUNT + 1)) 25 | done 26 | 27 | echo $(pwd -P)/$TARGET_FILE 28 | ) 29 | } 30 | 31 | . $(dirname $(realpath $0))/sbt-launch-lib.bash 32 | 33 | 34 | declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" 35 | declare -r sbt_opts_file=".sbtopts" 36 | declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" 37 | 38 | usage() { 39 | cat < path to global settings/plugins directory (default: ~/.sbt) 47 | -sbt-boot path to shared boot directory (default: ~/.sbt/boot in 0.11 series) 48 | -ivy path to local Ivy repository (default: ~/.ivy2) 49 | -mem set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem)) 50 | -no-share use all local caches; no sharing 51 | -no-global uses global caches, but does not use global ~/.sbt directory. 52 | -jvm-debug Turn on JVM debugging, open at the given port. 53 | -batch Disable interactive mode 54 | # sbt version (default: from project/build.properties if present, else latest release) 55 | -sbt-version use the specified version of sbt 56 | -sbt-jar use the specified jar as the sbt launcher 57 | -sbt-rc use an RC version of sbt 58 | -sbt-snapshot use a snapshot version of sbt 59 | # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) 60 | -java-home alternate JAVA_HOME 61 | # jvm options and output control 62 | JAVA_OPTS environment variable, if unset uses "$java_opts" 63 | SBT_OPTS environment variable, if unset uses "$default_sbt_opts" 64 | .sbtopts if this file exists in the current directory, it is 65 | prepended to the runner args 66 | /etc/sbt/sbtopts if this file exists, it is prepended to the runner args 67 | -Dkey=val pass -Dkey=val directly to the java runtime 68 | -J-X pass option -X directly to the java runtime 69 | (-J is stripped) 70 | -S-X add -X to sbt's scalacOptions (-J is stripped) 71 | -PmavenProfiles Enable a maven profile for the build. 72 | In the case of duplicated or conflicting options, the order above 73 | shows precedence: JAVA_OPTS lowest, command line options highest. 74 | EOM 75 | } 76 | 77 | process_my_args () { 78 | while [[ $# -gt 0 ]]; do 79 | case "$1" in 80 | -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; 81 | -no-share) addJava "$noshare_opts" && shift ;; 82 | -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; 83 | -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; 84 | -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; 85 | -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; 86 | -batch) exec &2 "$@" 31 | } 32 | vlog () { 33 | [[ $verbose || $debug ]] && echoerr "$@" 34 | } 35 | dlog () { 36 | [[ $debug ]] && echoerr "$@" 37 | } 38 | 39 | acquire_sbt_jar () { 40 | SBT_VERSION=`awk -F "=" '/sbt\\.version/ {print $2}' ./project/build.properties` 41 | URL1=https://dl.bintray.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar 42 | JAR=build/sbt-launch-${SBT_VERSION}.jar 43 | 44 | sbt_jar=$JAR 45 | 46 | if [[ ! -f "$sbt_jar" ]]; then 47 | # Download sbt launch jar if it hasn't been downloaded yet 48 | if [ ! -f ${JAR} ]; then 49 | # Download 50 | printf "Attempting to fetch sbt\n" 51 | JAR_DL=${JAR}.part 52 | if hash curl 2>/dev/null; then 53 | curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\ 54 | mv "${JAR_DL}" "${JAR}" 55 | elif hash wget 2>/dev/null; then 56 | wget --quiet ${URL1} -O "${JAR_DL}" &&\ 57 | mv "${JAR_DL}" "${JAR}" 58 | else 59 | printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" 60 | exit -1 61 | fi 62 | fi 63 | if [ ! -f ${JAR} ]; then 64 | # We failed to download 65 | printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" 66 | exit -1 67 | fi 68 | printf "Launching sbt from ${JAR}\n" 69 | fi 70 | } 71 | 72 | execRunner () { 73 | # print the arguments one to a line, quoting any containing spaces 74 | [[ $verbose || $debug ]] && echo "# Executing command line:" && { 75 | for arg; do 76 | if printf "%s\n" "$arg" | grep -q ' '; then 77 | printf "\"%s\"\n" "$arg" 78 | else 79 | printf "%s\n" "$arg" 80 | fi 81 | done 82 | echo "" 83 | } 84 | 85 | exec "$@" 86 | } 87 | 88 | addJava () { 89 | dlog "[addJava] arg = '$1'" 90 | java_args=( "${java_args[@]}" "$1" ) 91 | } 92 | 93 | enableProfile () { 94 | dlog "[enableProfile] arg = '$1'" 95 | maven_profiles=( "${maven_profiles[@]}" "$1" ) 96 | export SBT_MAVEN_PROFILES="${maven_profiles[@]}" 97 | } 98 | 99 | addSbt () { 100 | dlog "[addSbt] arg = '$1'" 101 | sbt_commands=( "${sbt_commands[@]}" "$1" ) 102 | } 103 | addResidual () { 104 | dlog "[residual] arg = '$1'" 105 | residual_args=( "${residual_args[@]}" "$1" ) 106 | } 107 | addDebugger () { 108 | addJava "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=$1" 109 | } 110 | 111 | # a ham-fisted attempt to move some memory settings in concert 112 | # so they need not be dicked around with individually. 113 | get_mem_opts () { 114 | local mem=${1:-2048} 115 | local perm=$(( $mem / 4 )) 116 | (( $perm > 256 )) || perm=256 117 | (( $perm < 4096 )) || perm=4096 118 | local codecache=$(( $perm / 2 )) 119 | 120 | echo "-Xms${mem}m -Xmx${mem}m -XX:MaxPermSize=${perm}m -XX:ReservedCodeCacheSize=${codecache}m" 121 | } 122 | 123 | require_arg () { 124 | local type="$1" 125 | local opt="$2" 126 | local arg="$3" 127 | if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then 128 | die "$opt requires <$type> argument" 129 | fi 130 | } 131 | 132 | is_function_defined() { 133 | declare -f "$1" > /dev/null 134 | } 135 | 136 | process_args () { 137 | while [[ $# -gt 0 ]]; do 138 | case "$1" in 139 | -h|-help) usage; exit 1 ;; 140 | -v|-verbose) verbose=1 && shift ;; 141 | -d|-debug) debug=1 && shift ;; 142 | 143 | -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; 144 | -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;; 145 | -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;; 146 | -batch) exec hadoopConf.set(flagName, v) 30 | case None => hadoopConf.unset(flagName) 31 | } 32 | 33 | old 34 | } 35 | } 36 | 37 | 38 | /** Filter that allows loading a fraction of HDFS files. */ 39 | class SamplePathFilter extends Configured with PathFilter { 40 | val random = { 41 | val rd = new Random() 42 | rd.setSeed(0) 43 | rd 44 | } 45 | 46 | // Ratio of files to be read from disk 47 | var sampleRatio: Double = 1 48 | 49 | override def setConf(conf: Configuration): Unit = { 50 | if (conf != null) { 51 | sampleRatio = conf.getDouble(SamplePathFilter.ratioParam, 1) 52 | } 53 | } 54 | 55 | override def accept(path: Path): Boolean = { 56 | // Note: checking fileSystem.isDirectory is very slow here, so we use basic rules instead 57 | !SamplePathFilter.isFile(path) || 58 | random.nextDouble() < sampleRatio 59 | } 60 | } 61 | 62 | object SamplePathFilter { 63 | val ratioParam = "sampleRatio" 64 | 65 | def isFile(path: Path): Boolean = FilenameUtils.getExtension(path.toString) != "" 66 | 67 | /** Set/unset hdfs PathFilter 68 | * 69 | * @param value Filter class that is passed to HDFS 70 | * @param sampleRatio Fraction of the files that the filter picks 71 | * @param spark Existing Spark session 72 | * @return 73 | */ 74 | def setPathFilter(value: Option[Class[_]], sampleRatio: Option[Double] = None, spark: SparkSession) 75 | : Option[Class[_]] = { 76 | val flagName = FileInputFormat.PATHFILTER_CLASS 77 | val hadoopConf = spark.sparkContext.hadoopConfiguration 78 | val old = Option(hadoopConf.getClass(flagName, null)) 79 | if (sampleRatio.isDefined) { 80 | hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio.get) 81 | } else { 82 | hadoopConf.unset(SamplePathFilter.ratioParam) 83 | None 84 | } 85 | 86 | value match { 87 | case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter]) 88 | case None => hadoopConf.unset(flagName) 89 | } 90 | old 91 | } 92 | } -------------------------------------------------------------------------------- /src/main/scala/org/apache/spark/image/ImageSchema.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.image 2 | 3 | import org.apache.spark.sql.{DataFrame, Row, SparkSession} 4 | import org.apache.spark.input.PortableDataStream 5 | import org.apache.spark.sql.types._ 6 | import org.apache.spark.rdd.RDD 7 | 8 | import java.awt.image.BufferedImage 9 | import java.awt.{Color, Image} 10 | import java.io.ByteArrayInputStream 11 | import javax.imageio.ImageIO 12 | import java.awt.color.ColorSpace 13 | 14 | object ImageSchema{ 15 | 16 | val undefinedImageType = "Undefined" 17 | 18 | val ocvTypes = Map( 19 | undefinedImageType -> -1, 20 | "CV_8U" -> 0, "CV_8UC1" -> 0, "CV_8UC2" -> 8, "CV_8UC3" -> 16, "CV_8UC4" -> 24, 21 | "CV_8S" -> 1, "CV_8SC1" -> 1, "CV_8SC2" -> 9, "CV_8SC3" -> 17, "CV_8SC4" -> 25, 22 | "CV_16U"-> 2, "CV_16UC1"-> 2, "CV_16UC2"->10, "CV_16UC3"-> 18, "CV_16UC4"-> 26, 23 | "CV_16S"-> 3, "CV_16SC1"-> 3, "CV_16SC2"->11, "CV_16SC3"-> 19, "CV_16SC4"-> 27, 24 | "CV_32S"-> 4, "CV_32SC1"-> 4, "CV_32SC2"->12, "CV_32SC3"-> 20, "CV_32SC4"-> 28, 25 | "CV_32F"-> 5, "CV_32FC1"-> 5, "CV_32FC2"->13, "CV_32FC3"-> 21, "CV_32FC4"-> 29, 26 | "CV_64F"-> 6, "CV_64FC1"-> 6, "CV_64FC2"->14, "CV_64FC3"-> 22, "CV_64FC4"-> 30 27 | ) 28 | 29 | /** Schema for the image column: Row(String, Int, Int, Int, Array[Byte]) */ 30 | val columnSchema = StructType( 31 | StructField("origin", StringType, true) :: 32 | StructField("height", IntegerType, false) :: 33 | StructField("width", IntegerType, false) :: 34 | StructField("nChannels", IntegerType, false) :: 35 | StructField("mode", StringType, false) :: //OpenCV-compatible type: CV_8UC3 in most cases 36 | StructField("data", BinaryType, false) :: Nil) //bytes in OpenCV-compatible order: row-wise BGR in most cases 37 | 38 | //dataframe with a single column of images named "image" (nullable) 39 | private val imageDFSchema = StructType(StructField("image", columnSchema, true) :: Nil) 40 | 41 | def getOrigin(row: Row): String = row.getString(0) 42 | def getHeight(row: Row): Int = row.getInt(1) 43 | def getWidth(row: Row): Int = row.getInt(2) 44 | def getNChannels(row: Row): Int = row.getInt(3) 45 | def getMode(row: Row): String = row.getString(4) 46 | def getData(row: Row): Array[Byte] = row.getAs[Array[Byte]](5) 47 | 48 | /** Check if the dataframe column contains images (i.e. has ImageSchema) 49 | * 50 | * @param df Dataframe 51 | * @param column Column name 52 | * @return True if the given column matches the image schema 53 | */ 54 | def isImageColumn(df: DataFrame, column: String): Boolean = 55 | df.schema(column).dataType == columnSchema 56 | 57 | /** Default values for the invalid image 58 | * 59 | * @param origin Origin of the invalid image 60 | * @return Row with the default values 61 | */ 62 | private def invalidImageRow(origin: String): Row = Row(Row(origin, -1, -1, -1, undefinedImageType, Array.ofDim[Byte](0))) 63 | 64 | /** Convert the compressed image (jpeg, png, etc.) into OpenCV representation and store it in dataframe Row 65 | * 66 | * @param origin Arbitrary string that identifies the image 67 | * @param bytes Image bytes (for example, jpeg) 68 | * @return Dataframe Row or None (if the decompression fails) 69 | */ 70 | private[spark] def decode(origin: String, bytes: Array[Byte]): Option[Row] = { 71 | 72 | val img = ImageIO.read(new ByteArrayInputStream(bytes)) 73 | 74 | if (img == null) { 75 | None 76 | } else { 77 | 78 | val is_gray = img.getColorModel.getColorSpace.getType == ColorSpace.TYPE_GRAY 79 | val has_alpha = img.getColorModel.hasAlpha 80 | 81 | val height = img.getHeight 82 | val width = img.getWidth 83 | val (nChannels, mode) = if(is_gray) (1, "CV_8UC1") 84 | else if(has_alpha) (4, "CV_8UC4") 85 | else (3, "CV_8UC3") 86 | 87 | assert(height*width*nChannels < 1e9, "image is too large") 88 | val decoded = Array.ofDim[Byte](height*width*nChannels) 89 | 90 | // grayscale images in Java require special handling to get the correct intensity 91 | if(is_gray){ 92 | var offset = 0 93 | val raster = img.getRaster 94 | for(h <- 0 until height) { 95 | for (w <- 0 until width) { 96 | decoded(offset) = raster.getSample(w, h, 0).toByte 97 | offset += 1 98 | } 99 | } 100 | } 101 | else{ 102 | var offset = 0 103 | for (h <- 0 until height) { 104 | for (w <- 0 until width) { 105 | val color = new Color(img.getRGB(w, h)) 106 | 107 | decoded(offset) = color.getBlue.toByte 108 | decoded(offset + 1) = color.getGreen.toByte 109 | decoded(offset + 2) = color.getRed.toByte 110 | if (nChannels == 4) { 111 | decoded(offset + 3) = color.getAlpha.toByte 112 | } 113 | offset += nChannels 114 | } 115 | } 116 | } 117 | 118 | // the internal "Row" is needed, because the image is a single dataframe column 119 | Some(Row(Row(origin, height, width, nChannels, mode, decoded))) 120 | } 121 | } 122 | 123 | /** Read the directory of images from the local or remote source 124 | * 125 | * @param path Path to the image directory 126 | * @param sparkSession Spark Session 127 | * @param recursive Recursive path search flag 128 | * @param numPartitions Number of the dataframe partitions 129 | * @param dropImageFailures Drop the files that are not valid images from the result 130 | * @param sampleRatio Fraction of the files loaded 131 | * @return Dataframe with a single column "image" of images; see ImageSchema for the details 132 | */ 133 | def readImages(path: String, 134 | sparkSession: SparkSession = null, // do not use Option; it complicates Python call 135 | recursive: Boolean = false, 136 | numPartitions: Int = 0, 137 | dropImageFailures: Boolean = false, 138 | sampleRatio: Double = 1.0): DataFrame = { 139 | require(sampleRatio <= 1.0 && sampleRatio >= 0, "sampleRatio should be between 0 and 1") 140 | 141 | val session = if (sparkSession != null) sparkSession else SparkSession.builder().getOrCreate 142 | val partitions = if (numPartitions > 0) numPartitions else session.sparkContext.defaultParallelism 143 | 144 | val oldRecursiveFlag = RecursiveFlag.setRecursiveFlag(Some(recursive.toString), session) 145 | val oldPathFilter: Option[Class[_]] = 146 | if (sampleRatio < 1) 147 | SamplePathFilter.setPathFilter(Some(classOf[SamplePathFilter]), Some(sampleRatio), session) 148 | else 149 | None 150 | 151 | var result: DataFrame = null 152 | try { 153 | val streams = session.sparkContext.binaryFiles(path, partitions) 154 | .repartition(partitions) 155 | 156 | val images = if(dropImageFailures){ 157 | streams.flatMap{ 158 | case (origin, stream) => decode(origin, stream.toArray) 159 | } 160 | } 161 | else { 162 | streams.map{ 163 | case (origin, stream) => decode(origin, stream.toArray).getOrElse(invalidImageRow(origin)) 164 | } 165 | } 166 | 167 | result = session.createDataFrame(images, imageDFSchema) 168 | } 169 | finally { 170 | // return Hadoop flags to the original values 171 | RecursiveFlag.setRecursiveFlag(oldRecursiveFlag, session) 172 | SamplePathFilter.setPathFilter(oldPathFilter, None, session) 173 | () 174 | } 175 | 176 | result 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/1006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/1006.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/10064.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/10064.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/10071.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/10071.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/10328.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/10328.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/1052.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/1052.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/10660.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/10660.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/10661.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/10661.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/10748.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/10748.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/10787.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/10787.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/10815.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/10815.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/10822.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/10822.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/11112.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/11112.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/11210.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/11210.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/11240.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/11240.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/11248.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/11248.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/11430.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/11430.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/11589.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/11589.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/11688.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/11688.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/11731.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/11731.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/11872.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/11872.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/1201.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/1201.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/12172.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/12172.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/12435.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/12435.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/1249.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/1249.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/1252.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/1252.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/12940.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/12940.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13078.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13078.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13144.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13163.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13163.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13208.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13208.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13345.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13345.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13355.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13355.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13358.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13358.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13411.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13411.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13485.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13485.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13606.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13606.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13727.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13727.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13758.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13758.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13805.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13805.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13940.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13940.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/13950.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/13950.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/14150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/14150.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/1422.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/1422.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/14459.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/14459.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/14513.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/14513.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/14515.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/14515.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/14629.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/14629.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/14680.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/14680.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/14695.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/14695.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/14829.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/14829.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15027.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15027.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15274.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15274.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15303.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15303.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/1536.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/1536.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15424.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15424.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15493.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15493.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15593.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15593.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15601.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15601.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15658.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15658.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15670.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15670.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15786.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15786.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15848.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15848.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15946.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15946.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/15992.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/15992.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/16014.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/16014.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/16348.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/16348.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/16508.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/16508.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/17273.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/17273.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/17390.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/17390.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/17435.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/17435.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/17647.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/17647.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/17739.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/17739.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/17909.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/17909.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18104.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18104.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18173.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18173.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18218.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18218.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18261.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18261.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/1828.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/1828.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18343.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18343.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18371.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18371.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18381.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18381.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18611.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18611.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18657.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18657.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/1877.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/1877.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18810.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18810.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18937.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18937.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/18979.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/18979.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/19185.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/19185.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/19509.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/19509.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/19563.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/19563.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/19927.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/19927.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/19967.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/19967.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/19971.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/19971.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/20093.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/20093.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/201.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/201.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/20311.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/20311.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/20382.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/20382.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/2051.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/2051.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/20542.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/20542.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/20591.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/CIFAR10-sample/20591.png -------------------------------------------------------------------------------- /src/test/resources/images/CIFAR10-sample/not-image.txt: -------------------------------------------------------------------------------- 1 | not an image 2 | -------------------------------------------------------------------------------- /src/test/resources/images/multi-channel/BGR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/multi-channel/BGR.png -------------------------------------------------------------------------------- /src/test/resources/images/multi-channel/BGRA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/multi-channel/BGRA.png -------------------------------------------------------------------------------- /src/test/resources/images/multi-channel/grayscale.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/spark-images/8de579d19f292e5e43bbd7a752290ba3dc8a1cf0/src/test/resources/images/multi-channel/grayscale.png -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/image/TestImageSchema.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.image 2 | 3 | import org.scalatest.FunSuite 4 | import org.apache.spark.sql.{DataFrame, Row} 5 | import org.apache.spark.sql.types._ 6 | import org.apache.spark.{SparkConf, SparkContext} 7 | import org.apache.spark.sql.{Row, DataFrame, SQLContext, SparkSession} 8 | import org.apache.spark.rdd.RDD 9 | import org.apache.spark.image.ImageSchema._ 10 | import java.nio.file.Paths 11 | 12 | class TestImageSchemaSuite extends FunSuite with TestSparkContext { 13 | 14 | //single column of images named "image" 15 | private val imageDFSchema = StructType(StructField("image", ImageSchema.columnSchema, true) :: Nil) 16 | private lazy val imagePath = getClass.getResource("/images").getPath 17 | 18 | test("Smoke test: create basic Spark dataframe") { 19 | val df = spark.createDataFrame(Seq((0, 0.0))) 20 | assert(df.count == 1) 21 | } 22 | 23 | test("Smoke test: create basic ImageSchema dataframe") { 24 | val origin = "path" 25 | val width = 1 26 | val height = 1 27 | val nChannels = 3 28 | val data = Array[Byte](0, 0, 0) 29 | val mode = "CV_8UC3" 30 | 31 | val rows = Seq(Row(Row(origin, height, width, nChannels, mode, data)), //internal Row corresponds to image StructType 32 | Row(Row(null, height, width, nChannels, mode, data))) 33 | val rdd = sc.makeRDD(rows) 34 | val df = spark.createDataFrame(rdd, imageDFSchema) 35 | 36 | assert(df.count == 2, "incorrect image count") 37 | assert(ImageSchema.isImageColumn(df, "image"), "data do not fit ImageSchema") 38 | } 39 | 40 | test("readImages count test") { 41 | var df = readImages(imagePath, recursive = false) 42 | assert(df.count == 0) 43 | 44 | df = readImages(imagePath, recursive = true, dropImageFailures = false) 45 | assert(df.count == 104) 46 | 47 | df = readImages(imagePath, recursive = true, dropImageFailures = true) 48 | val count100 = df.count 49 | assert(count100 == 103) 50 | 51 | df = readImages(imagePath, recursive = true, sampleRatio = 0.5, dropImageFailures = true) 52 | val count50 = df.count //random number about half of the size of the original dataset 53 | assert(count50 > 0.2 * count100 && count50 < 0.8 * count100) 54 | } 55 | 56 | test("readImages partition test") { 57 | val df = readImages(imagePath, recursive = true, dropImageFailures = true, numPartitions = 3) 58 | assert(df.rdd.getNumPartitions == 3) 59 | } 60 | 61 | //images with the different number of channels 62 | test("readImages pixel values test") { 63 | 64 | val images = readImages(imagePath + "/multi-channel/", recursive = false).collect 65 | 66 | images.foreach{ 67 | rrow => { 68 | val row = rrow.getAs[Row](0) 69 | val filename = Paths.get(getOrigin(row)).getFileName().toString() 70 | if(firstBytes20.contains(filename)) { 71 | val mode = getMode(row) 72 | val bytes20 = getData(row).slice(0, 20) 73 | 74 | val expectedMode = firstBytes20(filename)._1 75 | val expectedBytes = firstBytes20(filename)._2 76 | 77 | assert(expectedMode == mode, "mode of the image is not read correctly") 78 | 79 | if (!compareBytes(expectedBytes, bytes20)) { 80 | println(filename) 81 | println("result: " + bytes20.deep.toString) 82 | println("expected: " + expectedBytes.deep.toString) 83 | throw new Exception("incorrect numeric value for flattened image") 84 | } 85 | } 86 | } 87 | } 88 | } 89 | 90 | // number of channels and first 20 bytes of OpenCV representation 91 | // - default representation for 3-channel RGB images is BGR row-wise: (B00, G00, R00, B10, G10, R10, ...) 92 | // - default representation for 4-channel RGB images is BGRA row-wise: (B00, G00, R00, A00, B10, G10, R10, A00, ...) 93 | private val firstBytes20 = Map( 94 | "grayscale.png" -> (("CV_8UC1", Array[Byte](0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 3, 5, 2, 1))), 95 | "RGB.png" -> (("CV_8UC3", Array[Byte](-34, -66, -98, -38, -69, -98, -62, -90, -117, -70, -98, -124, -34, -63, -90, -20, -48, -74, -18, -45))), 96 | "RGBA.png" -> (("CV_8UC4", Array[Byte](-128, -128, -8, -1, -128, -128, -8, -1, -128, -128, -8, -1, 127, 127, -9, -1, 127, 127, -9, -1))) 97 | ) 98 | 99 | private def compareBytes(x: Array[Byte], y:Array[Byte]): Boolean = { 100 | val length = Math.min(x.length, y.length) 101 | for (i <- 0 to length-1) { 102 | if (x(i) != y(i)) return false 103 | } 104 | true 105 | } 106 | } -------------------------------------------------------------------------------- /src/test/scala/org/apache/spark/image/TestSparkContext.scala: -------------------------------------------------------------------------------- 1 | package org.apache.spark.image 2 | 3 | import org.apache.spark.sql.{DataFrame, Row} 4 | import org.apache.spark.sql.types._ 5 | import org.apache.spark.{SparkConf, SparkContext} 6 | import org.apache.spark.sql.{Row, DataFrame, SQLContext, SparkSession} 7 | 8 | import scala.reflect.runtime.universe._ 9 | import org.scalatest.{FunSuite, BeforeAndAfterAll} 10 | 11 | // This context is used for all tests in this project 12 | trait TestSparkContext extends BeforeAndAfterAll { self: FunSuite => 13 | @transient var sc: SparkContext = _ 14 | @transient var sqlContext: SQLContext = _ 15 | @transient lazy val spark: SparkSession = { 16 | val conf = new SparkConf() 17 | .setMaster("local[*]") 18 | .setAppName("Spark-Image-Test") 19 | .set("spark.ui.port", "4079") 20 | .set("spark.sql.shuffle.partitions", "4") // makes small tests much faster 21 | 22 | val sess = SparkSession.builder().config(conf).getOrCreate() 23 | sess.sparkContext.setLogLevel("WARN") 24 | sess 25 | } 26 | 27 | override def beforeAll() { 28 | super.beforeAll() 29 | sc = spark.sparkContext 30 | sqlContext = spark.sqlContext 31 | import spark.implicits._ 32 | } 33 | 34 | override def afterAll() { 35 | sqlContext = null 36 | if (sc != null) { 37 | sc.stop() 38 | } 39 | sc = null 40 | super.afterAll() 41 | } 42 | 43 | def makeDF[T: TypeTag](xs: Seq[T], col: String): DataFrame = { 44 | sqlContext.createDataFrame(xs.map(Tuple1.apply)).toDF(col) 45 | } 46 | 47 | def compareRows(r1: Array[Row], r2: Seq[Row]): Unit = { 48 | val a = r1.sortBy(_.toString()) 49 | val b = r2.sortBy(_.toString()) 50 | assert(a === b) 51 | } 52 | } 53 | --------------------------------------------------------------------------------