├── .gitignore ├── LICENSE ├── README.md ├── RELEASE-NOTES.md ├── build.gradle.kts ├── gradle.properties ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle.kts └── src ├── jmh └── java │ └── com │ └── palominolabs │ └── http │ └── url │ ├── PercentDecoderBenchmark.java │ ├── PercentEncoderBenchmark.java │ ├── URLDecoderBenchmark.java │ └── URLEncoderBenchmark.java ├── main └── java │ └── com │ └── palominolabs │ └── http │ └── url │ ├── PercentDecoder.java │ ├── PercentEncoder.java │ ├── PercentEncoderOutputHandler.java │ ├── StringBuilderPercentEncoderOutputHandler.java │ ├── UrlBuilder.java │ └── UrlPercentEncoders.java └── test ├── java └── com │ └── palominolabs │ └── http │ └── url │ ├── PercentEncoderTest.java │ └── UrlBuilderTest.java └── kotlin └── com └── palominolabs └── http └── url └── PercentDecoderTest.kt /.gitignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | *.ipr 3 | *.iws 4 | *.swp 5 | ./out 6 | .DS_Store 7 | .directory 8 | .gradle 9 | .idea 10 | build 11 | target 12 | out 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | # Copyfree Open Innovation License 2 | 3 | This is version 1.0 of the Copyfree Open Innovation License. 4 | 5 | ## Terms and Conditions 6 | 7 | Redistributions, modified or unmodified, in whole or in part, must retain 8 | applicable notices of copyright or other legal privilege, these conditions, and 9 | the following license terms and disclaimer. Subject to these conditions, each 10 | holder of copyright or other legal privileges, author or assembler, and 11 | contributor of this work, henceforth "licensor", hereby grants to any person 12 | who obtains a copy of this work in any form: 13 | 14 | 1. Permission to reproduce, modify, distribute, publish, sell, sublicense, use, 15 | and/or otherwise deal in the licensed material without restriction. 16 | 17 | 2. A perpetual, worldwide, non-exclusive, royalty-free, gratis, irrevocable 18 | patent license to make, have made, provide, transfer, import, use, and/or 19 | otherwise deal in the licensed material without restriction, for any and all 20 | patents held by such licensor and necessarily infringed by the form of the work 21 | upon distribution of that licensor's contribution to the work under the terms 22 | of this license. 23 | 24 | NO WARRANTY OF ANY KIND IS IMPLIED BY, OR SHOULD BE INFERRED FROM, THIS LICENSE 25 | OR THE ACT OF DISTRIBUTION UNDER THE TERMS OF THIS LICENSE, INCLUDING BUT NOT 26 | LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, 27 | AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS, ASSEMBLERS, OR HOLDERS OF 28 | COPYRIGHT OR OTHER LEGAL PRIVILEGE BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER 29 | LIABILITY, WHETHER IN ACTION OF CONTRACT, TORT, OR OTHERWISE ARISING FROM, OUT 30 | OF, OR IN CONNECTION WITH THE WORK OR THE USE OF OR OTHER DEALINGS IN THE WORK. 31 | 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Use this library to safely create valid, correctly encoded URL strings with a fluent API. 2 | 3 | # Usage 4 | 5 | Artifacts are released in Maven Central. For gradle, use the `mavenCentral()` repository. 6 | 7 | Add this to the `dependencies` block in your `build.gradle.kts`: 8 | 9 | ```groovy 10 | implementation("com.palominolabs.http", "url-builder", "VERSION") 11 | ``` 12 | 13 | where `VERSION` is the latest released version. If you're using Maven, know that your life could be greatly improved by switching to Gradle and use this dependency block: 14 | 15 | ```xml 16 | 17 | com.palominolabs.http 18 | url-builder 19 | VERSION 20 | 21 | ``` 22 | 23 | # Example 24 | 25 | ```java 26 | // showcase the different encoding rules used on different URL components 27 | UrlBuilder.forHost("http", "foo.com") 28 | .pathSegment("with spaces") 29 | .pathSegments("path", "with", "varArgs") 30 | .pathSegment("&=?/") 31 | .queryParam("fancy + name", "fancy?=value") 32 | .matrixParam("matrix", "param?") 33 | .fragment("#?=") 34 | .toUrlString() 35 | 36 | // produces: 37 | // http://foo.com/with%20spaces/path/with/varArgs/&=%3F%2F;matrix=param%3F?fancy%20%2B%20name=fancy?%3Dvalue#%23?= 38 | ``` 39 | 40 | # Motivation 41 | 42 | See [this blog post](http://blog.palominolabs.com/2013/10/03/creating-urls-correctly-and-safely/) for a thorough explanation. 43 | 44 | Ideally, the Java SDK would provide a good way to build properly encoded URLs. Unfortunately, it does not. 45 | 46 | [`URLEncoder`](http://docs.oracle.com/javase/7/docs/api/java/net/URLEncoder.html) seems like a thing that you want to use, but amazingly enough it actually does HTML form encoding, not URL encoding. 47 | 48 | URL encoding is also not something that can be done once you've formed a complete URL string. If your URL is already correctly encoded, you do not need to do anything. If it is not, it is impossible to parse it into its constituent parts for subsequent encoding. You must construct a url piece by piece, correctly encoding each piece as you go, to end up with a valid URL string. The encoding rules are also different for different parts of the URL (path, query param, etc.) 49 | 50 | Since the URLs that we use in practice for HTTP have somewhat different rules than "generic" URLs, UrlBuilder errs on the side of usefulness for HTTP-specific URLs. Notably, this means that '+' is percent-encoded to avoid being interpreted as a space. Also, in the URL/URI specs, the query string's format is not defined, but in practice it is used to hold `key=value` pairs separated by `&`. 51 | 52 | # Building 53 | 54 | Run `./gradlew build`. 55 | -------------------------------------------------------------------------------- /RELEASE-NOTES.md: -------------------------------------------------------------------------------- 1 | - 1.1.5 2 | - Build with Java 8 toolchain to resolve [#13](https://github.com/palominolabs/url-builder/issues/13). 3 | - 1.1.4 4 | - More publication-related tinkering 5 | - 1.1.3 6 | - Switch to publishing via Maven Central 7 | - 1.1.2 8 | - Drop commons-lang, guava, and slf4j dependencies 9 | - Require Java 8 10 | - 1.1.1 11 | - Performance tuning 12 | - Allow unstructured HTTP query (not in `key=value&key=value` form) 13 | - Switch to releasing artifacts on Bintray 14 | - 1.1.0 15 | - Expose PercentEncoder as public 16 | - Allow initializing a UrlBuilder from a `java.net.URL` 17 | - 1.0.2 18 | - Java 6 compatible 19 | - 1.0.1 20 | - Matrix params specified per path segment 21 | -------------------------------------------------------------------------------- /build.gradle.kts: -------------------------------------------------------------------------------- 1 | import org.jetbrains.kotlin.gradle.tasks.KotlinCompile 2 | import java.net.URI 3 | import java.time.Duration 4 | 5 | plugins { 6 | `java-library` 7 | kotlin("jvm") version "1.7.20" 8 | id("maven-publish") 9 | signing 10 | id("io.github.gradle-nexus.publish-plugin") version "1.1.0" 11 | id("com.github.ben-manes.versions") version "0.44.0" 12 | id("me.champeau.gradle.jmh") version "0.5.3" 13 | id("net.researchgate.release") version "3.0.2" 14 | id("org.jmailen.kotlinter") version "3.12.0" 15 | } 16 | 17 | java { 18 | withSourcesJar() 19 | withJavadocJar() 20 | toolchain { 21 | languageVersion.set(JavaLanguageVersion.of(8)) 22 | vendor.set(JvmVendorSpec.AZUL) 23 | } 24 | } 25 | 26 | repositories { 27 | mavenCentral() 28 | } 29 | 30 | val deps by extra { 31 | mapOf( 32 | "slf4j" to "2.0.5", 33 | "jmh" to "1.22", 34 | "junit" to "5.9.1" 35 | ) 36 | } 37 | 38 | dependencies { 39 | api("com.google.code.findbugs", "jsr305", "3.0.2") 40 | 41 | testRuntimeOnly("org.slf4j", "slf4j-simple", "${deps["slf4j"]}") 42 | testRuntimeOnly("org.slf4j", "log4j-over-slf4j", "${deps["slf4j"]}") 43 | testRuntimeOnly("org.slf4j", "jcl-over-slf4j", "${deps["slf4j"]}") 44 | testImplementation("org.slf4j", "jul-to-slf4j", "${deps["slf4j"]}") 45 | 46 | testImplementation("org.junit.jupiter", "junit-jupiter-api", "${deps["junit"]}") 47 | testRuntimeOnly("org.junit.jupiter", "junit-jupiter-engine", "${deps["junit"]}") 48 | 49 | testImplementation(kotlin("stdlib-jdk8")) 50 | testImplementation(kotlin("test-junit5")) 51 | 52 | 53 | jmhImplementation("com.google.guava", "guava", "31.1-jre") 54 | } 55 | 56 | group = "com.palominolabs.http" 57 | 58 | tasks { 59 | test { 60 | useJUnitPlatform() 61 | } 62 | 63 | withType { 64 | kotlinOptions.jvmTarget = "1.8" 65 | } 66 | } 67 | 68 | publishing { 69 | publications { 70 | register("sonatype") { 71 | from(components["java"]) 72 | 73 | // sonatype required pom elements 74 | pom { 75 | name.set("${project.group}:${project.name}") 76 | description.set(name) 77 | url.set("https://github.com/palominolabs/url-builder") 78 | licenses { 79 | license { 80 | name.set("Copyfree Open Innovation License 0.4") 81 | url.set("https://copyfree.org/content/standard/licenses/coil/license.txt") 82 | } 83 | } 84 | developers { 85 | developer { 86 | id.set("marshallpierce") 87 | name.set("Marshall Pierce") 88 | email.set("575695+marshallpierce@users.noreply.github.com") 89 | } 90 | } 91 | scm { 92 | connection.set("scm:git:https://github.com/palominolabs/url-builder") 93 | developerConnection.set("scm:git:ssh://git@github.com:palominolabs/url-builder.git") 94 | url.set("https://github.com/palominolabs/url-builder") 95 | } 96 | } 97 | } 98 | } 99 | 100 | // A safe throw-away place to publish to: 101 | // ./gradlew publishSonatypePublicationToLocalDebugRepository -Pversion=foo 102 | repositories { 103 | maven { 104 | name = "localDebug" 105 | url = URI.create("file:///${project.buildDir}/repos/localDebug") 106 | } 107 | } 108 | } 109 | 110 | jmh { 111 | jmhVersion = deps["jmh"] 112 | } 113 | 114 | tasks.afterReleaseBuild { 115 | dependsOn(provider { project.tasks.named("publishToSonatype") }) 116 | } 117 | 118 | // don't barf for devs without signing set up 119 | if (project.hasProperty("signing.keyId")) { 120 | signing { 121 | sign(project.extensions.getByType().publications["sonatype"]) 122 | } 123 | } 124 | 125 | nexusPublishing { 126 | repositories { 127 | sonatype { 128 | // sonatypeUsername and sonatypePassword properties are used automatically 129 | stagingProfileId.set("26c8b7fff47581") // com.palominolabs 130 | } 131 | } 132 | // these are not strictly required. The default timeouts are set to 1 minute. But Sonatype can be really slow. 133 | // If you get the error "java.net.SocketTimeoutException: timeout", these lines will help. 134 | connectTimeout.set(Duration.ofMinutes(3)) 135 | clientTimeout.set(Duration.ofMinutes(3)) 136 | } 137 | 138 | release { 139 | git { 140 | requireBranch.set("master") 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /gradle.properties: -------------------------------------------------------------------------------- 1 | version = 1.1.6-SNAPSHOT 2 | kotlin.stdlib.default.dependency=false 3 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/palominolabs/url-builder/2643fb6c6e6428c5f0a112196a1c486401e950c3/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.6-bin.zip 4 | networkTimeout=10000 5 | zipStoreBase=GRADLE_USER_HOME 6 | zipStorePath=wrapper/dists 7 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # Copyright © 2015-2021 the original authors. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # https://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # 18 | 19 | ############################################################################## 20 | # 21 | # Gradle start up script for POSIX generated by Gradle. 22 | # 23 | # Important for running: 24 | # 25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is 26 | # noncompliant, but you have some other compliant shell such as ksh or 27 | # bash, then to run this script, type that shell name before the whole 28 | # command line, like: 29 | # 30 | # ksh Gradle 31 | # 32 | # Busybox and similar reduced shells will NOT work, because this script 33 | # requires all of these POSIX shell features: 34 | # * functions; 35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}», 36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»; 37 | # * compound commands having a testable exit status, especially «case»; 38 | # * various built-in commands including «command», «set», and «ulimit». 39 | # 40 | # Important for patching: 41 | # 42 | # (2) This script targets any POSIX shell, so it avoids extensions provided 43 | # by Bash, Ksh, etc; in particular arrays are avoided. 44 | # 45 | # The "traditional" practice of packing multiple parameters into a 46 | # space-separated string is a well documented source of bugs and security 47 | # problems, so this is (mostly) avoided, by progressively accumulating 48 | # options in "$@", and eventually passing that to Java. 49 | # 50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, 51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; 52 | # see the in-line comments for details. 53 | # 54 | # There are tweaks for specific operating systems such as AIX, CygWin, 55 | # Darwin, MinGW, and NonStop. 56 | # 57 | # (3) This script is generated from the Groovy template 58 | # https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt 59 | # within the Gradle project. 60 | # 61 | # You can find Gradle at https://github.com/gradle/gradle/. 62 | # 63 | ############################################################################## 64 | 65 | # Attempt to set APP_HOME 66 | 67 | # Resolve links: $0 may be a link 68 | app_path=$0 69 | 70 | # Need this for daisy-chained symlinks. 71 | while 72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path 73 | [ -h "$app_path" ] 74 | do 75 | ls=$( ls -ld "$app_path" ) 76 | link=${ls#*' -> '} 77 | case $link in #( 78 | /*) app_path=$link ;; #( 79 | *) app_path=$APP_HOME$link ;; 80 | esac 81 | done 82 | 83 | # This is normally unused 84 | # shellcheck disable=SC2034 85 | APP_BASE_NAME=${0##*/} 86 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit 87 | 88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' 90 | 91 | # Use the maximum available, or set MAX_FD != -1 to use that value. 92 | MAX_FD=maximum 93 | 94 | warn () { 95 | echo "$*" 96 | } >&2 97 | 98 | die () { 99 | echo 100 | echo "$*" 101 | echo 102 | exit 1 103 | } >&2 104 | 105 | # OS specific support (must be 'true' or 'false'). 106 | cygwin=false 107 | msys=false 108 | darwin=false 109 | nonstop=false 110 | case "$( uname )" in #( 111 | CYGWIN* ) cygwin=true ;; #( 112 | Darwin* ) darwin=true ;; #( 113 | MSYS* | MINGW* ) msys=true ;; #( 114 | NONSTOP* ) nonstop=true ;; 115 | esac 116 | 117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 118 | 119 | 120 | # Determine the Java command to use to start the JVM. 121 | if [ -n "$JAVA_HOME" ] ; then 122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 123 | # IBM's JDK on AIX uses strange locations for the executables 124 | JAVACMD=$JAVA_HOME/jre/sh/java 125 | else 126 | JAVACMD=$JAVA_HOME/bin/java 127 | fi 128 | if [ ! -x "$JAVACMD" ] ; then 129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 130 | 131 | Please set the JAVA_HOME variable in your environment to match the 132 | location of your Java installation." 133 | fi 134 | else 135 | JAVACMD=java 136 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 137 | 138 | Please set the JAVA_HOME variable in your environment to match the 139 | location of your Java installation." 140 | fi 141 | 142 | # Increase the maximum file descriptors if we can. 143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then 144 | case $MAX_FD in #( 145 | max*) 146 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. 147 | # shellcheck disable=SC3045 148 | MAX_FD=$( ulimit -H -n ) || 149 | warn "Could not query maximum file descriptor limit" 150 | esac 151 | case $MAX_FD in #( 152 | '' | soft) :;; #( 153 | *) 154 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. 155 | # shellcheck disable=SC3045 156 | ulimit -n "$MAX_FD" || 157 | warn "Could not set maximum file descriptor limit to $MAX_FD" 158 | esac 159 | fi 160 | 161 | # Collect all arguments for the java command, stacking in reverse order: 162 | # * args from the command line 163 | # * the main class name 164 | # * -classpath 165 | # * -D...appname settings 166 | # * --module-path (only if needed) 167 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. 168 | 169 | # For Cygwin or MSYS, switch paths to Windows format before running java 170 | if "$cygwin" || "$msys" ; then 171 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) 172 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) 173 | 174 | JAVACMD=$( cygpath --unix "$JAVACMD" ) 175 | 176 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 177 | for arg do 178 | if 179 | case $arg in #( 180 | -*) false ;; # don't mess with options #( 181 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath 182 | [ -e "$t" ] ;; #( 183 | *) false ;; 184 | esac 185 | then 186 | arg=$( cygpath --path --ignore --mixed "$arg" ) 187 | fi 188 | # Roll the args list around exactly as many times as the number of 189 | # args, so each arg winds up back in the position where it started, but 190 | # possibly modified. 191 | # 192 | # NB: a `for` loop captures its iteration list before it begins, so 193 | # changing the positional parameters here affects neither the number of 194 | # iterations, nor the values presented in `arg`. 195 | shift # remove old arg 196 | set -- "$@" "$arg" # push replacement arg 197 | done 198 | fi 199 | 200 | # Collect all arguments for the java command; 201 | # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of 202 | # shell script including quotes and variable substitutions, so put them in 203 | # double quotes to make sure that they get re-expanded; and 204 | # * put everything else in single quotes, so that it's not re-expanded. 205 | 206 | set -- \ 207 | "-Dorg.gradle.appname=$APP_BASE_NAME" \ 208 | -classpath "$CLASSPATH" \ 209 | org.gradle.wrapper.GradleWrapperMain \ 210 | "$@" 211 | 212 | # Stop when "xargs" is not available. 213 | if ! command -v xargs >/dev/null 2>&1 214 | then 215 | die "xargs is not available" 216 | fi 217 | 218 | # Use "xargs" to parse quoted args. 219 | # 220 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed. 221 | # 222 | # In Bash we could simply go: 223 | # 224 | # readarray ARGS < <( xargs -n1 <<<"$var" ) && 225 | # set -- "${ARGS[@]}" "$@" 226 | # 227 | # but POSIX shell has neither arrays nor command substitution, so instead we 228 | # post-process each arg (as a line of input to sed) to backslash-escape any 229 | # character that might be a shell metacharacter, then use eval to reverse 230 | # that process (while maintaining the separation between arguments), and wrap 231 | # the whole thing up as a single "set" statement. 232 | # 233 | # This will of course break if any of these variables contains a newline or 234 | # an unmatched quote. 235 | # 236 | 237 | eval "set -- $( 238 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | 239 | xargs -n1 | 240 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | 241 | tr '\n' ' ' 242 | )" '"$@"' 243 | 244 | exec "$JAVACMD" "$@" 245 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%"=="" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%"=="" set DIRNAME=. 29 | @rem This is normally unused 30 | set APP_BASE_NAME=%~n0 31 | set APP_HOME=%DIRNAME% 32 | 33 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 34 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 35 | 36 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 37 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 38 | 39 | @rem Find java.exe 40 | if defined JAVA_HOME goto findJavaFromJavaHome 41 | 42 | set JAVA_EXE=java.exe 43 | %JAVA_EXE% -version >NUL 2>&1 44 | if %ERRORLEVEL% equ 0 goto execute 45 | 46 | echo. 47 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 48 | echo. 49 | echo Please set the JAVA_HOME variable in your environment to match the 50 | echo location of your Java installation. 51 | 52 | goto fail 53 | 54 | :findJavaFromJavaHome 55 | set JAVA_HOME=%JAVA_HOME:"=% 56 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 57 | 58 | if exist "%JAVA_EXE%" goto execute 59 | 60 | echo. 61 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 62 | echo. 63 | echo Please set the JAVA_HOME variable in your environment to match the 64 | echo location of your Java installation. 65 | 66 | goto fail 67 | 68 | :execute 69 | @rem Setup the command line 70 | 71 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 72 | 73 | 74 | @rem Execute Gradle 75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 76 | 77 | :end 78 | @rem End local scope for the variables with windows NT shell 79 | if %ERRORLEVEL% equ 0 goto mainEnd 80 | 81 | :fail 82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 83 | rem the _cmd.exe /c_ return code! 84 | set EXIT_CODE=%ERRORLEVEL% 85 | if %EXIT_CODE% equ 0 set EXIT_CODE=1 86 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% 87 | exit /b %EXIT_CODE% 88 | 89 | :mainEnd 90 | if "%OS%"=="Windows_NT" endlocal 91 | 92 | :omega 93 | -------------------------------------------------------------------------------- /settings.gradle.kts: -------------------------------------------------------------------------------- 1 | rootProject.name = "url-builder" 2 | -------------------------------------------------------------------------------- /src/jmh/java/com/palominolabs/http/url/PercentDecoderBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.palominolabs.http.url; 2 | 3 | import java.nio.charset.CharacterCodingException; 4 | import java.nio.charset.StandardCharsets; 5 | import org.openjdk.jmh.annotations.Benchmark; 6 | import org.openjdk.jmh.annotations.Scope; 7 | import org.openjdk.jmh.annotations.State; 8 | 9 | import static com.palominolabs.http.url.PercentEncoderBenchmark.LARGE_STRING_MIX; 10 | import static com.palominolabs.http.url.PercentEncoderBenchmark.SMALL_STRING_MIX; 11 | 12 | public class PercentDecoderBenchmark { 13 | 14 | static final String SMALL_STRING_ENCODED; 15 | static final String LARGE_STRING_ENCODED; 16 | 17 | static { 18 | PercentEncoder encoder = UrlPercentEncoders.getUnstructuredQueryEncoder(); 19 | try { 20 | SMALL_STRING_ENCODED = encoder.encode(SMALL_STRING_MIX); 21 | } catch (CharacterCodingException e) { 22 | throw new RuntimeException(e); 23 | } 24 | try { 25 | LARGE_STRING_ENCODED = encoder.encode(LARGE_STRING_MIX); 26 | } catch (CharacterCodingException e) { 27 | throw new RuntimeException(e); 28 | } 29 | } 30 | 31 | @State(Scope.Thread) 32 | public static class ThreadState { 33 | PercentDecoder decoder = new PercentDecoder(StandardCharsets.UTF_8.newDecoder()); 34 | } 35 | 36 | @Benchmark 37 | public String testPercentDecodeSmall(ThreadState state) throws CharacterCodingException { 38 | return state.decoder.decode(SMALL_STRING_ENCODED); 39 | } 40 | 41 | @Benchmark 42 | public String testPercentDecodeLarge(ThreadState state) throws CharacterCodingException { 43 | return state.decoder.decode(LARGE_STRING_ENCODED); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/jmh/java/com/palominolabs/http/url/PercentEncoderBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.palominolabs.http.url; 2 | 3 | import com.google.common.base.Strings; 4 | import org.openjdk.jmh.annotations.Benchmark; 5 | import org.openjdk.jmh.annotations.Scope; 6 | import org.openjdk.jmh.annotations.State; 7 | 8 | import java.nio.charset.CharacterCodingException; 9 | 10 | public class PercentEncoderBenchmark { 11 | 12 | // safe and unsafe 13 | static final String TINY_STRING_MIX = "foo bar baz"; 14 | static final String SMALL_STRING_MIX = "small value !@#$%^&*()???????????????!@#$%^&*()"; 15 | // no characters escaped 16 | static final String SMALL_STRING_ALL_SAFE = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; 17 | // all characters escaped 18 | static final String SMALL_STRING_ALL_UNSAFE = "???????????????????????????????????????????????"; 19 | 20 | static final String LARGE_STRING_MIX; 21 | static final String LARGE_STRING_ALL_SAFE; 22 | static final String LARGE_STRING_ALL_UNSAFE; 23 | 24 | static { 25 | LARGE_STRING_MIX = Strings.repeat(SMALL_STRING_MIX, 1000); 26 | LARGE_STRING_ALL_SAFE = Strings.repeat(SMALL_STRING_ALL_SAFE, 1000); 27 | LARGE_STRING_ALL_UNSAFE = Strings.repeat(SMALL_STRING_ALL_UNSAFE, 1000); 28 | } 29 | 30 | @State(Scope.Thread) 31 | public static class ThreadState { 32 | PercentEncoder encoder = UrlPercentEncoders.getUnstructuredQueryEncoder(); 33 | PercentEncoderOutputHandler noOpHandler = new NoOpOutputHandler(); 34 | AccumXorOutputHandler accumXorHandler = new AccumXorOutputHandler(); 35 | } 36 | 37 | @Benchmark 38 | public String testPercentEncodeTinyMix(ThreadState state) throws CharacterCodingException { 39 | return state.encoder.encode(TINY_STRING_MIX); 40 | } 41 | 42 | @Benchmark 43 | public String testPercentEncodeSmallMix(ThreadState state) throws CharacterCodingException { 44 | return state.encoder.encode(SMALL_STRING_MIX); 45 | } 46 | 47 | @Benchmark 48 | public String testPercentEncodeLargeMix(ThreadState state) throws CharacterCodingException { 49 | return state.encoder.encode(LARGE_STRING_MIX); 50 | } 51 | 52 | @Benchmark 53 | public String testPercentEncodeSmallSafe(ThreadState state) throws CharacterCodingException { 54 | return state.encoder.encode(SMALL_STRING_ALL_SAFE); 55 | } 56 | 57 | @Benchmark 58 | public String testPercentEncodeLargeSafe(ThreadState state) throws CharacterCodingException { 59 | return state.encoder.encode(LARGE_STRING_ALL_SAFE); 60 | } 61 | 62 | @Benchmark 63 | public String testPercentEncodeSmallUnsafe(ThreadState state) throws CharacterCodingException { 64 | return state.encoder.encode(SMALL_STRING_ALL_UNSAFE); 65 | } 66 | 67 | @Benchmark 68 | public String testPercentEncodeLargeUnsafe(ThreadState state) throws CharacterCodingException { 69 | return state.encoder.encode(LARGE_STRING_ALL_UNSAFE); 70 | } 71 | 72 | @Benchmark 73 | public void testPercentEncodeSmallNoOpMix(ThreadState state) throws CharacterCodingException { 74 | state.encoder.encode(SMALL_STRING_MIX, state.noOpHandler); 75 | } 76 | 77 | @Benchmark 78 | public void testPercentEncodeLargeNoOpMix(ThreadState state) throws CharacterCodingException { 79 | state.encoder.encode(LARGE_STRING_MIX, state.noOpHandler); 80 | } 81 | 82 | @Benchmark 83 | public char testPercentEncodeSmallAccumXorMix(ThreadState state) throws CharacterCodingException { 84 | state.encoder.encode(SMALL_STRING_MIX, state.accumXorHandler); 85 | return state.accumXorHandler.c; 86 | } 87 | 88 | @Benchmark 89 | public char testPercentEncodeLargeAccumXorMix(ThreadState state) throws CharacterCodingException { 90 | state.encoder.encode(LARGE_STRING_MIX, state.accumXorHandler); 91 | return state.accumXorHandler.c; 92 | } 93 | 94 | static class NoOpOutputHandler implements PercentEncoderOutputHandler { 95 | 96 | @Override 97 | public void onOutputChar(char c) { 98 | // no op 99 | } 100 | } 101 | 102 | /** 103 | * A handler that doesn't allocate, but can't be optimized away 104 | */ 105 | static class AccumXorOutputHandler implements PercentEncoderOutputHandler { 106 | char c; 107 | 108 | @Override 109 | public void onOutputChar(char c) { 110 | this.c ^= c; 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/jmh/java/com/palominolabs/http/url/URLDecoderBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.palominolabs.http.url; 2 | 3 | import org.openjdk.jmh.annotations.Benchmark; 4 | 5 | import java.io.UnsupportedEncodingException; 6 | import java.net.URLDecoder; 7 | import java.nio.charset.CharacterCodingException; 8 | 9 | import static com.palominolabs.http.url.PercentDecoderBenchmark.LARGE_STRING_ENCODED; 10 | import static com.palominolabs.http.url.PercentDecoderBenchmark.SMALL_STRING_ENCODED; 11 | 12 | public class URLDecoderBenchmark { 13 | 14 | @Benchmark 15 | public String testUrlDecodeSmall() throws CharacterCodingException, UnsupportedEncodingException { 16 | return URLDecoder.decode(SMALL_STRING_ENCODED, "UTF-8"); 17 | } 18 | 19 | @Benchmark 20 | public String testUrlDecodeLarge() throws CharacterCodingException, UnsupportedEncodingException { 21 | return URLDecoder.decode(LARGE_STRING_ENCODED, "UTF-8"); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/jmh/java/com/palominolabs/http/url/URLEncoderBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.palominolabs.http.url; 2 | 3 | import org.openjdk.jmh.annotations.Benchmark; 4 | 5 | import java.io.UnsupportedEncodingException; 6 | import java.net.URLEncoder; 7 | import java.nio.charset.CharacterCodingException; 8 | 9 | import static com.palominolabs.http.url.PercentEncoderBenchmark.LARGE_STRING_MIX; 10 | import static com.palominolabs.http.url.PercentEncoderBenchmark.SMALL_STRING_MIX; 11 | 12 | public class URLEncoderBenchmark { 13 | 14 | @Benchmark 15 | public String testUrlEncodeSmall() throws CharacterCodingException, UnsupportedEncodingException { 16 | return URLEncoder.encode(SMALL_STRING_MIX, "UTF-8"); 17 | } 18 | 19 | @Benchmark 20 | public String testUrlEncodeLarge() throws CharacterCodingException, UnsupportedEncodingException { 21 | return URLEncoder.encode(LARGE_STRING_MIX, "UTF-8"); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/palominolabs/http/url/PercentDecoder.java: -------------------------------------------------------------------------------- 1 | package com.palominolabs.http.url; 2 | 3 | import javax.annotation.Nonnull; 4 | import javax.annotation.concurrent.NotThreadSafe; 5 | import java.nio.ByteBuffer; 6 | import java.nio.CharBuffer; 7 | import java.nio.charset.CharsetDecoder; 8 | import java.nio.charset.CoderResult; 9 | import java.nio.charset.MalformedInputException; 10 | import java.nio.charset.UnmappableCharacterException; 11 | 12 | import static java.nio.charset.CoderResult.OVERFLOW; 13 | import static java.nio.charset.CoderResult.UNDERFLOW; 14 | 15 | /** 16 | * Decodes percent-encoded (%XX) Unicode text. 17 | */ 18 | @NotThreadSafe 19 | public final class PercentDecoder { 20 | 21 | /** 22 | * bytes represented by the current sequence of %-triples. Resized as needed. 23 | */ 24 | private ByteBuffer encodedBuf; 25 | 26 | /** 27 | * Written to with decoded chars by decoder 28 | */ 29 | private final CharBuffer decodedCharBuf; 30 | private final CharsetDecoder decoder; 31 | 32 | /** 33 | * The decoded string for the current input 34 | */ 35 | private final StringBuilder outputBuf = new StringBuilder(); 36 | 37 | /** 38 | * Construct a new PercentDecoder with default buffer sizes. 39 | * 40 | * @param charsetDecoder Charset to decode bytes into chars with 41 | * @see PercentDecoder#PercentDecoder(CharsetDecoder, int, int) 42 | */ 43 | public PercentDecoder(@Nonnull CharsetDecoder charsetDecoder) { 44 | this(charsetDecoder, 16, 16); 45 | } 46 | 47 | /** 48 | * @param charsetDecoder Charset to decode bytes into chars with 49 | * @param initialEncodedByteBufSize Initial size of buffer that holds encoded bytes 50 | * @param decodedCharBufSize Size of buffer that encoded bytes are decoded into 51 | */ 52 | public PercentDecoder(@Nonnull CharsetDecoder charsetDecoder, int initialEncodedByteBufSize, 53 | int decodedCharBufSize) { 54 | encodedBuf = ByteBuffer.allocate(initialEncodedByteBufSize); 55 | decodedCharBuf = CharBuffer.allocate(decodedCharBufSize); 56 | decoder = charsetDecoder; 57 | } 58 | 59 | /** 60 | * @param input Input with %-encoded representation of characters in this instance's configured character set, e.g. 61 | * "%20" for a space character 62 | * @return Corresponding string with %-encoded data decoded and converted to their corresponding characters 63 | * @throws MalformedInputException if decoder is configured to report errors and malformed input is detected 64 | * @throws UnmappableCharacterException if decoder is configured to report errors and an unmappable character is 65 | * detected 66 | */ 67 | @Nonnull 68 | public String decode(@Nonnull CharSequence input) throws MalformedInputException, UnmappableCharacterException { 69 | outputBuf.setLength(0); 70 | // this is almost always an underestimate of the size needed: 71 | // only a 4-byte encoding (which is 12 characters input) would case this to be an overestimate 72 | outputBuf.ensureCapacity(input.length() / 8); 73 | encodedBuf.clear(); 74 | 75 | for (int i = 0; i < input.length(); i++) { 76 | char c = input.charAt(i); 77 | if (c != '%') { 78 | handleEncodedBytes(); 79 | 80 | outputBuf.append(c); 81 | continue; 82 | } 83 | 84 | if (i + 2 >= input.length()) { 85 | throw new IllegalArgumentException( 86 | "Could not percent decode <" + input + ">: incomplete %-pair at position " + i); 87 | } 88 | 89 | // grow the byte buf if needed 90 | if (encodedBuf.remaining() == 0) { 91 | ByteBuffer largerBuf = ByteBuffer.allocate(encodedBuf.capacity() * 2); 92 | encodedBuf.flip(); 93 | largerBuf.put(encodedBuf); 94 | encodedBuf = largerBuf; 95 | } 96 | 97 | // note that we advance i here as we consume chars 98 | int msBits = Character.digit(input.charAt(++i), 16); 99 | int lsBits = Character.digit(input.charAt(++i), 16); 100 | 101 | if (msBits == -1 || lsBits == -1) { 102 | throw new IllegalArgumentException("Invalid %-tuple <" + input.subSequence(i - 2, i + 1) + ">"); 103 | } 104 | 105 | msBits <<= 4; 106 | msBits |= lsBits; 107 | 108 | // msBits can only have 8 bits set, so cast is safe 109 | encodedBuf.put((byte) msBits); 110 | } 111 | 112 | handleEncodedBytes(); 113 | 114 | return outputBuf.toString(); 115 | } 116 | 117 | /** 118 | * Decode any buffered encoded bytes and write them to the output buf. 119 | */ 120 | private void handleEncodedBytes() throws MalformedInputException, UnmappableCharacterException { 121 | if (encodedBuf.position() == 0) { 122 | // nothing to do 123 | return; 124 | } 125 | 126 | decoder.reset(); 127 | CoderResult coderResult; 128 | 129 | // switch to reading mode 130 | encodedBuf.flip(); 131 | 132 | // loop while we're filling up the decoded char buf, or there's any encoded bytes 133 | // decode() in practice seems to only consume bytes when it can decode an entire char... 134 | do { 135 | decodedCharBuf.clear(); 136 | coderResult = decoder.decode(encodedBuf, decodedCharBuf, false); 137 | throwIfError(coderResult); 138 | appendDecodedChars(); 139 | } while (coderResult == OVERFLOW && encodedBuf.hasRemaining()); 140 | 141 | // final decode with end-of-input flag 142 | decodedCharBuf.clear(); 143 | coderResult = decoder.decode(encodedBuf, decodedCharBuf, true); 144 | throwIfError(coderResult); 145 | 146 | if (encodedBuf.hasRemaining()) { 147 | throw new IllegalStateException("Final decode didn't error, but didn't consume remaining input bytes"); 148 | } 149 | if (coderResult != UNDERFLOW) { 150 | throw new IllegalStateException("Expected underflow, but instead final decode returned " + coderResult); 151 | } 152 | 153 | appendDecodedChars(); 154 | 155 | // we've finished the input, wrap it up 156 | encodedBuf.clear(); 157 | flush(); 158 | } 159 | 160 | /** 161 | * Must only be called when the input encoded bytes buffer is empty 162 | */ 163 | private void flush() throws MalformedInputException, UnmappableCharacterException { 164 | CoderResult coderResult; 165 | decodedCharBuf.clear(); 166 | 167 | coderResult = decoder.flush(decodedCharBuf); 168 | appendDecodedChars(); 169 | 170 | throwIfError(coderResult); 171 | 172 | if (coderResult != UNDERFLOW) { 173 | throw new IllegalStateException("Decoder flush resulted in " + coderResult); 174 | } 175 | } 176 | 177 | /** 178 | * If coderResult is considered an error (i.e. not overflow or underflow), throw the corresponding 179 | * CharacterCodingException. 180 | * 181 | * @param coderResult result to check 182 | * @throws MalformedInputException if result represents malformed input 183 | * @throws UnmappableCharacterException if result represents an unmappable character 184 | */ 185 | private void throwIfError(CoderResult coderResult) throws MalformedInputException, UnmappableCharacterException { 186 | if (coderResult.isMalformed()) { 187 | throw new MalformedInputException(coderResult.length()); 188 | } 189 | if (coderResult.isUnmappable()) { 190 | throw new UnmappableCharacterException(coderResult.length()); 191 | } } 192 | 193 | /** 194 | * Flip the decoded char buf and append it to the string bug 195 | */ 196 | private void appendDecodedChars() { 197 | decodedCharBuf.flip(); 198 | outputBuf.append(decodedCharBuf); 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /src/main/java/com/palominolabs/http/url/PercentEncoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Palomino Labs, Inc. 3 | */ 4 | 5 | package com.palominolabs.http.url; 6 | 7 | import javax.annotation.Nonnull; 8 | import javax.annotation.concurrent.NotThreadSafe; 9 | import java.nio.ByteBuffer; 10 | import java.nio.CharBuffer; 11 | import java.nio.charset.CharsetEncoder; 12 | import java.nio.charset.CoderResult; 13 | import java.nio.charset.MalformedInputException; 14 | import java.nio.charset.UnmappableCharacterException; 15 | import java.util.BitSet; 16 | 17 | import static java.lang.Character.isHighSurrogate; 18 | import static java.lang.Character.isLowSurrogate; 19 | 20 | /** 21 | * Encodes unsafe characters as a sequence of %XX hex-encoded bytes. 22 | * 23 | * This is typically done when encoding components of URLs. See {@link UrlPercentEncoders} for pre-configured 24 | * PercentEncoder instances. 25 | */ 26 | @NotThreadSafe 27 | public final class PercentEncoder { 28 | 29 | private static final char[] HEX_CODE = "0123456789ABCDEF".toCharArray(); 30 | 31 | private final BitSet safeChars; 32 | private final CharsetEncoder encoder; 33 | /** 34 | * Pre-allocate a string handler to make the common case of encoding to a string faster 35 | */ 36 | private final StringBuilderPercentEncoderOutputHandler stringHandler = new StringBuilderPercentEncoderOutputHandler(); 37 | private final ByteBuffer encodedBytes; 38 | private final CharBuffer unsafeCharsToEncode; 39 | 40 | /** 41 | * @param safeChars the set of chars to NOT encode, stored as a bitset with the int positions corresponding to 42 | * those chars set to true. Treated as read only. 43 | * @param charsetEncoder charset encoder to encode characters with. Make sure to not re-use CharsetEncoder instances 44 | * across threads. 45 | */ 46 | public PercentEncoder(@Nonnull BitSet safeChars, @Nonnull CharsetEncoder charsetEncoder) { 47 | this.safeChars = safeChars; 48 | this.encoder = charsetEncoder; 49 | 50 | // why is this a float? sigh. 51 | int maxBytesPerChar = 1 + (int) encoder.maxBytesPerChar(); 52 | // need to handle surrogate pairs, so need to be able to handle 2 chars worth of stuff at once 53 | encodedBytes = ByteBuffer.allocate(maxBytesPerChar * 2); 54 | unsafeCharsToEncode = CharBuffer.allocate(2); 55 | } 56 | 57 | /** 58 | * Encode the input and pass output chars to a handler. 59 | * 60 | * @param input input string 61 | * @param handler handler to call on each output character 62 | * @throws MalformedInputException if encoder is configured to report errors and malformed input is detected 63 | * @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is 64 | * detected 65 | */ 66 | public void encode(@Nonnull CharSequence input, @Nonnull PercentEncoderOutputHandler handler) throws 67 | MalformedInputException, UnmappableCharacterException { 68 | 69 | for (int i = 0; i < input.length(); i++) { 70 | 71 | char c = input.charAt(i); 72 | 73 | if (safeChars.get(c)) { 74 | handler.onOutputChar(c); 75 | continue; 76 | } 77 | 78 | // not a safe char 79 | unsafeCharsToEncode.clear(); 80 | unsafeCharsToEncode.append(c); 81 | if (isHighSurrogate(c)) { 82 | if (input.length() > i + 1) { 83 | // get the low surrogate as well 84 | char lowSurrogate = input.charAt(i + 1); 85 | if (isLowSurrogate(lowSurrogate)) { 86 | unsafeCharsToEncode.append(lowSurrogate); 87 | i++; 88 | } else { 89 | throw new IllegalArgumentException( 90 | "Invalid UTF-16: Char " + (i) + " is a high surrogate (\\u" + Integer 91 | .toHexString(c) + "), but char " + (i + 1) + " is not a low surrogate (\\u" + Integer 92 | .toHexString(lowSurrogate) + ")"); 93 | } 94 | } else { 95 | throw new IllegalArgumentException( 96 | "Invalid UTF-16: The last character in the input string was a high surrogate (\\u" + Integer 97 | .toHexString(c) + ")"); 98 | } 99 | } 100 | 101 | flushUnsafeCharBuffer(handler); 102 | } 103 | } 104 | 105 | /** 106 | * Encode the input and return the resulting text as a String. 107 | * 108 | * @param input input string 109 | * @return the input string with every character that's not in safeChars turned into its byte representation via the 110 | * instance's encoder and then percent-encoded 111 | * @throws MalformedInputException if encoder is configured to report errors and malformed input is detected 112 | * @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is 113 | * detected 114 | */ 115 | @Nonnull 116 | public String encode(@Nonnull CharSequence input) throws MalformedInputException, UnmappableCharacterException { 117 | stringHandler.reset(); 118 | stringHandler.ensureCapacity(input.length()); 119 | encode(input, stringHandler); 120 | return stringHandler.getContents(); 121 | } 122 | 123 | /** 124 | * Encode unsafeCharsToEncode to bytes as per charsetEncoder, then percent-encode those bytes into output. 125 | * 126 | * Side effects: unsafeCharsToEncode will be read from and cleared. encodedBytes will be cleared and written to. 127 | * 128 | * @param handler where the encoded versions of the contents of unsafeCharsToEncode will be written 129 | */ 130 | private void flushUnsafeCharBuffer(PercentEncoderOutputHandler handler) throws MalformedInputException, 131 | UnmappableCharacterException { 132 | // need to read from the char buffer, which was most recently written to 133 | unsafeCharsToEncode.flip(); 134 | 135 | encodedBytes.clear(); 136 | 137 | encoder.reset(); 138 | CoderResult result = encoder.encode(unsafeCharsToEncode, encodedBytes, true); 139 | checkResult(result); 140 | result = encoder.flush(encodedBytes); 141 | checkResult(result); 142 | 143 | // read contents of bytebuffer 144 | encodedBytes.flip(); 145 | 146 | while (encodedBytes.hasRemaining()) { 147 | byte b = encodedBytes.get(); 148 | 149 | handler.onOutputChar('%'); 150 | handler.onOutputChar(HEX_CODE[b >> 4 & 0xF]); 151 | handler.onOutputChar(HEX_CODE[b & 0xF]); 152 | } 153 | } 154 | 155 | /** 156 | * @param result result to check 157 | * @throws IllegalStateException if result is overflow 158 | * @throws MalformedInputException if result represents malformed input 159 | * @throws UnmappableCharacterException if result represents an unmappable character 160 | */ 161 | private static void checkResult(CoderResult result) throws MalformedInputException, UnmappableCharacterException { 162 | if (result.isOverflow()) { 163 | throw new IllegalStateException("Byte buffer overflow; this should not happen."); 164 | } 165 | if (result.isMalformed()) { 166 | throw new MalformedInputException(result.length()); 167 | } 168 | if (result.isUnmappable()) { 169 | throw new UnmappableCharacterException(result.length()); 170 | } 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/main/java/com/palominolabs/http/url/PercentEncoderOutputHandler.java: -------------------------------------------------------------------------------- 1 | package com.palominolabs.http.url; 2 | 3 | import javax.annotation.concurrent.NotThreadSafe; 4 | 5 | /** 6 | * A callback used during percent encoding. 7 | */ 8 | @NotThreadSafe 9 | public interface PercentEncoderOutputHandler { 10 | /** 11 | * Called on each character output by a PercentEncoder. 12 | * 13 | * @param c output character that's either in the calling PercentEncoder's safe char set or part of a 14 | * percent-hex-encoded triple, e.g. "%FF". 15 | */ 16 | void onOutputChar(char c); 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/com/palominolabs/http/url/StringBuilderPercentEncoderOutputHandler.java: -------------------------------------------------------------------------------- 1 | package com.palominolabs.http.url; 2 | 3 | import javax.annotation.Nonnull; 4 | import javax.annotation.concurrent.NotThreadSafe; 5 | 6 | /** 7 | * A PercentEncoderHandler implementation that accumulates chars in a buffer. 8 | */ 9 | @NotThreadSafe 10 | public final class StringBuilderPercentEncoderOutputHandler implements PercentEncoderOutputHandler { 11 | 12 | private final StringBuilder stringBuilder; 13 | 14 | /** 15 | * Create a new handler with a default size StringBuilder. 16 | */ 17 | public StringBuilderPercentEncoderOutputHandler() { 18 | stringBuilder = new StringBuilder(); 19 | } 20 | 21 | /** 22 | * @return A string containing the chars accumulated since the last call to reset() 23 | */ 24 | @Nonnull 25 | public String getContents() { 26 | return stringBuilder.toString(); 27 | } 28 | 29 | /** 30 | * Clear the buffer. 31 | */ 32 | public void reset() { 33 | stringBuilder.setLength(0); 34 | } 35 | 36 | /** 37 | * Ensure the internal buffer has enough capacity for the specified length of input. 38 | * 39 | * @param length length to ensure capacity for 40 | */ 41 | public void ensureCapacity(int length) { 42 | stringBuilder.ensureCapacity(length); 43 | } 44 | 45 | @Override 46 | public void onOutputChar(char c) { 47 | stringBuilder.append(c); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/palominolabs/http/url/UrlBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Palomino Labs, Inc. 3 | */ 4 | 5 | package com.palominolabs.http.url; 6 | 7 | import java.net.URL; 8 | import java.nio.charset.CharacterCodingException; 9 | import java.nio.charset.CharsetDecoder; 10 | import java.util.ArrayList; 11 | import java.util.Iterator; 12 | import java.util.List; 13 | import java.util.regex.Pattern; 14 | import javax.annotation.Nonnull; 15 | import javax.annotation.Nullable; 16 | import javax.annotation.concurrent.NotThreadSafe; 17 | 18 | import static com.palominolabs.http.url.UrlPercentEncoders.getFragmentEncoder; 19 | import static com.palominolabs.http.url.UrlPercentEncoders.getMatrixEncoder; 20 | import static com.palominolabs.http.url.UrlPercentEncoders.getPathEncoder; 21 | import static com.palominolabs.http.url.UrlPercentEncoders.getQueryParamEncoder; 22 | import static com.palominolabs.http.url.UrlPercentEncoders.getRegNameEncoder; 23 | import static com.palominolabs.http.url.UrlPercentEncoders.getUnstructuredQueryEncoder; 24 | import static java.nio.charset.StandardCharsets.UTF_8; 25 | 26 | /** 27 | * Builder for urls with url-encoding applied to path, query param, etc. 28 | * 29 | * Escaping rules are from RFC 3986, RFC 1738 and the HTML 4 spec (http://www.w3.org/TR/html401/interact/forms.html#form-content-type). 30 | * This means that this diverges from the canonical URI/URL rules for the sake of being what you want to actually make 31 | * HTTP-useful URLs. 32 | */ 33 | @NotThreadSafe 34 | public final class UrlBuilder { 35 | 36 | /** 37 | * IPv6 address, cribbed from http://stackoverflow.com/questions/46146/what-are-the-java-regular-expressions-for-matching-ipv4-and-ipv6-strings 38 | */ 39 | private static final Pattern IPV6_PATTERN = Pattern 40 | .compile( 41 | "\\A\\[((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)]\\z"); 42 | 43 | /** 44 | * IPv4 dotted quad 45 | */ 46 | private static final Pattern IPV4_PATTERN = Pattern 47 | .compile("\\A(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}\\z"); 48 | 49 | @Nonnull 50 | private final String scheme; 51 | 52 | @Nonnull 53 | private final String host; 54 | 55 | @Nullable 56 | private final Integer port; 57 | 58 | private final List> queryParams = new ArrayList<>(); 59 | 60 | /** 61 | * If this is non-null, queryParams must be empty, and vice versa. 62 | */ 63 | @Nullable 64 | private String unstructuredQuery; 65 | 66 | private final List pathSegments = new ArrayList<>(); 67 | 68 | private final PercentEncoder pathEncoder = getPathEncoder(); 69 | private final PercentEncoder regNameEncoder = getRegNameEncoder(); 70 | private final PercentEncoder matrixEncoder = getMatrixEncoder(); 71 | private final PercentEncoder queryParamEncoder = getQueryParamEncoder(); 72 | private final PercentEncoder unstructuredQueryEncoder = getUnstructuredQueryEncoder(); 73 | private final PercentEncoder fragmentEncoder = getFragmentEncoder(); 74 | 75 | @Nullable 76 | private String fragment; 77 | 78 | private boolean forceTrailingSlash = false; 79 | 80 | /** 81 | * Create a URL with UTF-8 encoding. 82 | * 83 | * @param scheme scheme (e.g. http) 84 | * @param host host (e.g. foo.com or 1.2.3.4 or [::1]) 85 | * @param port null or a positive integer 86 | */ 87 | private UrlBuilder(@Nonnull String scheme, @Nonnull String host, @Nullable Integer port) { 88 | this.host = host; 89 | this.scheme = scheme; 90 | this.port = port; 91 | } 92 | 93 | /** 94 | * Create a URL with an null port and UTF-8 encoding. 95 | * 96 | * @param scheme scheme (e.g. http) 97 | * @param host host in any of the valid syntaxes: reg-name (a dns name), ipv4 literal (1.2.3.4), ipv6 literal 98 | * ([::1]), excluding IPvFuture since no one uses that in practice 99 | * @return a url builder 100 | * @see UrlBuilder#forHost(String scheme, String host, int port) 101 | */ 102 | public static UrlBuilder forHost(@Nonnull String scheme, @Nonnull String host) { 103 | return new UrlBuilder(scheme, host, null); 104 | } 105 | 106 | /** 107 | * @param scheme scheme (e.g. http) 108 | * @param host host in any of the valid syntaxes: reg-name ( a dns name), ipv4 literal (1.2.3.4), ipv6 literal 109 | * ([::1]), excluding IPvFuture since no one uses that in practice 110 | * @param port port 111 | * @return a url builder 112 | */ 113 | public static UrlBuilder forHost(@Nonnull String scheme, @Nonnull String host, int port) { 114 | return new UrlBuilder(scheme, host, port); 115 | } 116 | 117 | /** 118 | * Calls {@link UrlBuilder#fromUrl(URL, CharsetDecoder)} with a UTF-8 CharsetDecoder. The same semantics about the 119 | * query string apply. 120 | * 121 | * @param url url to initialize builder with 122 | * @return a UrlBuilder containing the host, path, etc. from the url 123 | * @throws CharacterCodingException if char decoding fails 124 | * @see UrlBuilder#fromUrl(URL, CharsetDecoder) 125 | */ 126 | @Nonnull 127 | public static UrlBuilder fromUrl(@Nonnull URL url) throws CharacterCodingException { 128 | return fromUrl(url, UTF_8.newDecoder()); 129 | } 130 | 131 | /** 132 | * Create a UrlBuilder initialized with the contents of a {@link URL}. 133 | * 134 | * The query string will be parsed into HTML4 query params if it can be separated into a 135 | * &-separated sequence of key=value pairs. The sequence of query params can then be 136 | * appended to by continuing to call {@link UrlBuilder#queryParam(String, String)}. The concept of query params is 137 | * only part of the HTML spec (and common HTTP usage), though, so it's perfectly legal to have a query string that 138 | * is in some other form. To represent this case, if the aforementioned param-parsing attempt fails, the query 139 | * string will be treated as just a monolithic, unstructured, string. In this case, calls to {@link 140 | * UrlBuilder#queryParam(String, String)} on the resulting instance will throw IllegalStateException, and only calls 141 | * to {@link UrlBuilder#unstructuredQuery(String)}}, which replaces the entire query string, are allowed. 142 | * 143 | * @param url url to initialize builder with 144 | * @param charsetDecoder the decoder to decode encoded bytes with (except for reg names, which are always UTF-8) 145 | * @return a UrlBuilder containing the host, path, etc. from the url 146 | * @throws CharacterCodingException if decoding percent-encoded bytes fails and charsetDecoder is configured to 147 | * report errors 148 | * @see UrlBuilder#fromUrl(URL, CharsetDecoder) 149 | */ 150 | @Nonnull 151 | public static UrlBuilder fromUrl(@Nonnull URL url, @Nonnull CharsetDecoder charsetDecoder) throws 152 | CharacterCodingException { 153 | 154 | PercentDecoder decoder = new PercentDecoder(charsetDecoder); 155 | // reg names must be encoded UTF-8 156 | PercentDecoder regNameDecoder; 157 | if (charsetDecoder.charset().equals(UTF_8)) { 158 | regNameDecoder = decoder; 159 | } else { 160 | regNameDecoder = new PercentDecoder(UTF_8.newDecoder()); 161 | } 162 | 163 | Integer port = url.getPort(); 164 | if (port == -1) { 165 | port = null; 166 | } 167 | 168 | UrlBuilder builder = new UrlBuilder(url.getProtocol(), regNameDecoder.decode(url.getHost()), port); 169 | 170 | buildFromPath(builder, decoder, url); 171 | 172 | buildFromQuery(builder, decoder, url); 173 | 174 | if (url.getRef() != null) { 175 | builder.fragment(decoder.decode(url.getRef())); 176 | } 177 | 178 | return builder; 179 | } 180 | 181 | /** 182 | * Add a path segment. 183 | * 184 | * @param segment a path segment 185 | * @return this 186 | */ 187 | @Nonnull 188 | public UrlBuilder pathSegment(@Nonnull String segment) { 189 | pathSegments.add(new PathSegment(segment)); 190 | return this; 191 | } 192 | 193 | /** 194 | * Add multiple path segments. Equivalent to successive calls to {@link UrlBuilder#pathSegment(String)}. 195 | * 196 | * @param segments path segments 197 | * @return this 198 | */ 199 | @Nonnull 200 | public UrlBuilder pathSegments(String... segments) { 201 | for (String segment : segments) { 202 | pathSegment(segment); 203 | } 204 | 205 | return this; 206 | } 207 | 208 | /** 209 | * Add an HTML query parameter. Query parameters will be encoded in the order added. 210 | * 211 | * Using query strings to encode key=value pairs is not part of the URI/URL specification; it is specified by 212 | * http://www.w3.org/TR/html401/interact/forms.html#form-content-type. 213 | * 214 | * If you use this method to build a query string, or created this builder from a url with a query string that can 215 | * successfully be parsed into query param pairs, you cannot subsequently use {@link 216 | * UrlBuilder#unstructuredQuery(String)}. See {@link UrlBuilder#fromUrl(URL, CharsetDecoder)}. 217 | * 218 | * @param name param name 219 | * @param value param value 220 | * @return this 221 | */ 222 | @Nonnull 223 | public UrlBuilder queryParam(@Nonnull String name, @Nonnull String value) { 224 | if (unstructuredQuery != null) { 225 | throw new IllegalStateException( 226 | "Cannot call queryParam() when this already has an unstructured query specified"); 227 | } 228 | 229 | queryParams.add(Pair.of(name, value)); 230 | return this; 231 | } 232 | 233 | /** 234 | * Set the complete query string of arbitrary structure. This is useful when you want to specify a query string that 235 | * is not of key=value format. If the query has previously been set via this method, subsequent calls will overwrite 236 | * that query. 237 | * 238 | * If you use this method, or create a builder from a URL whose query is not parseable into query param pairs, you 239 | * cannot subsequently use {@link UrlBuilder#queryParam(String, String)}. See {@link UrlBuilder#fromUrl(URL, 240 | * CharsetDecoder)}. 241 | * 242 | * @param query Complete URI query, as specified by https://tools.ietf.org/html/rfc3986#section-3.4 243 | * @return this 244 | */ 245 | @Nonnull 246 | public UrlBuilder unstructuredQuery(@Nonnull String query) { 247 | if (!queryParams.isEmpty()) { 248 | throw new IllegalStateException( 249 | "Cannot call unstructuredQuery() when this already has queryParam pairs specified"); 250 | } 251 | 252 | unstructuredQuery = query; 253 | 254 | return this; 255 | } 256 | 257 | /** 258 | * Clear the unstructured query and any query params. 259 | * 260 | * Since the query / query param situation is a little complicated, this method will let you remove all query 261 | * information and start again from scratch. This may be useful when taking an existing url, parsing it into a 262 | * builder, and then re-doing its query params, for instance. 263 | * 264 | * @return this 265 | */ 266 | @Nonnull 267 | public UrlBuilder clearQuery() { 268 | queryParams.clear(); 269 | unstructuredQuery = null; 270 | 271 | return this; 272 | } 273 | 274 | /** 275 | * Add a matrix param to the last added path segment. If no segments have been added, the param will be added to the 276 | * root. Matrix params will be encoded in the order added. 277 | * 278 | * @param name param name 279 | * @param value param value 280 | * @return this 281 | */ 282 | @Nonnull 283 | public UrlBuilder matrixParam(@Nonnull String name, @Nonnull String value) { 284 | if (pathSegments.isEmpty()) { 285 | // create an empty path segment to represent a matrix param applied to the root 286 | pathSegment(""); 287 | } 288 | 289 | PathSegment seg = pathSegments.get(pathSegments.size() - 1); 290 | seg.matrixParams.add(Pair.of(name, value)); 291 | return this; 292 | } 293 | 294 | /** 295 | * Set the fragment. 296 | * 297 | * @param fragment fragment string 298 | * @return this 299 | */ 300 | @Nonnull 301 | public UrlBuilder fragment(@Nonnull String fragment) { 302 | this.fragment = fragment; 303 | return this; 304 | } 305 | 306 | /** 307 | * Force the generated URL to have a trailing slash at the end of the path. 308 | * 309 | * @return this 310 | */ 311 | @Nonnull 312 | public UrlBuilder forceTrailingSlash() { 313 | forceTrailingSlash = true; 314 | return this; 315 | } 316 | 317 | /** 318 | * Encode the current builder state into a URL string. 319 | * 320 | * @return a well-formed URL string 321 | * @throws CharacterCodingException if character encoding fails and the encoder is configured to report errors 322 | */ 323 | public String toUrlString() throws CharacterCodingException { 324 | StringBuilder buf = new StringBuilder(); 325 | 326 | buf.append(scheme); 327 | buf.append("://"); 328 | 329 | buf.append(encodeHost(host)); 330 | if (port != null) { 331 | buf.append(':'); 332 | buf.append(port); 333 | } 334 | 335 | for (PathSegment pathSegment : pathSegments) { 336 | buf.append('/'); 337 | buf.append(pathEncoder.encode(pathSegment.segment)); 338 | 339 | for (Pair matrixParam : pathSegment.matrixParams) { 340 | buf.append(';'); 341 | buf.append(matrixEncoder.encode(matrixParam.getKey())); 342 | buf.append('='); 343 | buf.append(matrixEncoder.encode(matrixParam.getValue())); 344 | } 345 | } 346 | 347 | if (forceTrailingSlash) { 348 | buf.append('/'); 349 | } 350 | 351 | if (!queryParams.isEmpty()) { 352 | buf.append("?"); 353 | Iterator> qpIter = queryParams.iterator(); 354 | while (qpIter.hasNext()) { 355 | Pair queryParam = qpIter.next(); 356 | buf.append(queryParamEncoder.encode(queryParam.getKey())); 357 | buf.append('='); 358 | buf.append(queryParamEncoder.encode(queryParam.getValue())); 359 | if (qpIter.hasNext()) { 360 | buf.append('&'); 361 | } 362 | } 363 | } else if (unstructuredQuery != null) { 364 | buf.append("?"); 365 | buf.append(unstructuredQueryEncoder.encode(unstructuredQuery)); 366 | } 367 | 368 | if (fragment != null) { 369 | buf.append('#'); 370 | buf.append(fragmentEncoder.encode(fragment)); 371 | } 372 | 373 | return buf.toString(); 374 | } 375 | 376 | /** 377 | * Populate a url builder based on the query of a url 378 | * 379 | * @param builder builder 380 | * @param decoder decoder 381 | * @param url url 382 | * @throws CharacterCodingException 383 | */ 384 | private static void buildFromQuery(UrlBuilder builder, PercentDecoder decoder, URL url) throws 385 | CharacterCodingException { 386 | if (url.getQuery() != null) { 387 | String q = url.getQuery(); 388 | 389 | // try to parse into &-separated key=value pairs 390 | List> pairs = new ArrayList<>(); 391 | boolean parseOk = true; 392 | 393 | for (String queryChunk : q.split("&")) { 394 | String[] queryParamChunks = queryChunk.split("="); 395 | 396 | if (queryParamChunks.length != 2) { 397 | parseOk = false; 398 | break; 399 | } 400 | 401 | pairs.add(Pair.of(decoder.decode(queryParamChunks[0]), 402 | decoder.decode(queryParamChunks[1]))); 403 | } 404 | 405 | if (parseOk) { 406 | for (Pair pair : pairs) { 407 | builder.queryParam(pair.getKey(), pair.getValue()); 408 | } 409 | } else { 410 | builder.unstructuredQuery(decoder.decode(q)); 411 | } 412 | } 413 | } 414 | 415 | /** 416 | * Populate the path segments of a url builder from a url 417 | * 418 | * @param builder builder 419 | * @param decoder decoder 420 | * @param url url 421 | * @throws CharacterCodingException 422 | */ 423 | private static void buildFromPath(UrlBuilder builder, PercentDecoder decoder, URL url) throws 424 | CharacterCodingException { 425 | for (String pathChunk : url.getPath().split("/")) { 426 | if (pathChunk.equals("")) { 427 | continue; 428 | } 429 | 430 | if (pathChunk.charAt(0) == ';') { 431 | builder.pathSegment(""); 432 | // empty path segment, but matrix params 433 | for (String matrixChunk : pathChunk.substring(1).split(";")) { 434 | buildFromMatrixParamChunk(decoder, builder, matrixChunk); 435 | } 436 | 437 | continue; 438 | } 439 | 440 | // otherwise, path chunk is non empty and does not start with a ';' 441 | 442 | String[] matrixChunks = pathChunk.split(";"); 443 | 444 | // first chunk is always the path segment. If there is a trailing ; and no matrix params, the ; will 445 | // not be included in the final url. 446 | builder.pathSegment(decoder.decode(matrixChunks[0])); 447 | 448 | // if there any other chunks, they're matrix param pairs 449 | for (int i = 1; i < matrixChunks.length; i++) { 450 | buildFromMatrixParamChunk(decoder, builder, matrixChunks[i]); 451 | } 452 | } 453 | } 454 | 455 | private static void buildFromMatrixParamChunk(PercentDecoder decoder, UrlBuilder ub, String pathMatrixChunk) throws 456 | CharacterCodingException { 457 | String[] mtxPair = pathMatrixChunk.split("="); 458 | if (mtxPair.length != 2) { 459 | throw new IllegalArgumentException("Malformed matrix param: <" + pathMatrixChunk + ">"); 460 | } 461 | 462 | String mtxName = mtxPair[0]; 463 | String mtxVal = mtxPair[1]; 464 | ub.matrixParam(decoder.decode(mtxName), decoder.decode(mtxVal)); 465 | } 466 | 467 | /** 468 | * @param host original host string 469 | * @return host encoded as in RFC 3986 section 3.2.2 470 | */ 471 | @Nonnull 472 | private String encodeHost(String host) throws CharacterCodingException { 473 | // matching order: IP-literal, IPv4, reg-name 474 | if (IPV4_PATTERN.matcher(host).matches() || IPV6_PATTERN.matcher(host).matches()) { 475 | return host; 476 | } 477 | 478 | // it's a reg-name, which MUST be encoded as UTF-8 (regardless of the rest of the URL) 479 | return regNameEncoder.encode(host); 480 | } 481 | 482 | /** 483 | * Bundle of a path segment name and any associated matrix params. 484 | */ 485 | private static class PathSegment { 486 | private final String segment; 487 | private final List> matrixParams = new ArrayList<>(); 488 | 489 | PathSegment(String segment) { 490 | this.segment = segment; 491 | } 492 | } 493 | 494 | private static class Pair { 495 | 496 | private final K key; 497 | private final V value; 498 | 499 | private Pair(K key, V value) { 500 | this.key = key; 501 | this.value = value; 502 | } 503 | 504 | K getKey() { 505 | return key; 506 | } 507 | 508 | V getValue() { 509 | return value; 510 | } 511 | 512 | static Pair of(K key, V value) { 513 | return new Pair<>(key, value); 514 | } 515 | } 516 | } 517 | -------------------------------------------------------------------------------- /src/main/java/com/palominolabs/http/url/UrlPercentEncoders.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Palomino Labs, Inc. 3 | */ 4 | 5 | package com.palominolabs.http.url; 6 | 7 | import java.util.BitSet; 8 | import javax.annotation.concurrent.ThreadSafe; 9 | 10 | import static java.nio.charset.CodingErrorAction.REPLACE; 11 | import static java.nio.charset.StandardCharsets.UTF_8; 12 | 13 | /** 14 | * See RFC 3986, RFC 1738 and http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding. 15 | */ 16 | @ThreadSafe 17 | public final class UrlPercentEncoders { 18 | 19 | /** 20 | * an encoder for RFC 3986 reg-names 21 | */ 22 | 23 | private static final BitSet REG_NAME_BIT_SET = new BitSet(); 24 | 25 | private static final BitSet PATH_BIT_SET = new BitSet(); 26 | private static final BitSet MATRIX_BIT_SET = new BitSet(); 27 | private static final BitSet UNSTRUCTURED_QUERY_BIT_SET = new BitSet(); 28 | private static final BitSet QUERY_PARAM_BIT_SET = new BitSet(); 29 | private static final BitSet FRAGMENT_BIT_SET = new BitSet(); 30 | 31 | static { 32 | // RFC 3986 'reg-name'. This is not very aggressive... it's quite possible to have DNS-illegal names out of this. 33 | // Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant. 34 | addUnreserved(REG_NAME_BIT_SET); 35 | addSubdelims(REG_NAME_BIT_SET); 36 | 37 | // Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section. 38 | addPChar(PATH_BIT_SET); 39 | PATH_BIT_SET.clear((int) ';'); 40 | 41 | // Remove delims for HTTP matrix params as per RFC 1738 S3.3. The other reserved chars ('/' and '?') are already excluded. 42 | addPChar(MATRIX_BIT_SET); 43 | MATRIX_BIT_SET.clear((int) ';'); 44 | MATRIX_BIT_SET.clear((int) '='); 45 | 46 | /* 47 | * At this point it represents RFC 3986 'query'. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also 48 | * specifies that "+" can mean space in a query, so we will make sure to say that '+' is not safe to leave as-is 49 | */ 50 | addQuery(UNSTRUCTURED_QUERY_BIT_SET); 51 | UNSTRUCTURED_QUERY_BIT_SET.clear((int) '+'); 52 | 53 | /* 54 | * Create more stringent requirements for HTML4 queries: remove delimiters for HTML query params so that key=value 55 | * pairs can be used. 56 | */ 57 | QUERY_PARAM_BIT_SET.or(UNSTRUCTURED_QUERY_BIT_SET); 58 | QUERY_PARAM_BIT_SET.clear((int) '='); 59 | QUERY_PARAM_BIT_SET.clear((int) '&'); 60 | 61 | addFragment(FRAGMENT_BIT_SET); 62 | } 63 | 64 | /** 65 | * @return a PercentEncoder for RFC 3986 'reg-name' characters 66 | */ 67 | public static PercentEncoder getRegNameEncoder() { 68 | return new PercentEncoder(REG_NAME_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE) 69 | .onUnmappableCharacter(REPLACE)); 70 | } 71 | 72 | /** 73 | * @return a PercentEncoder for RFC 3986 'pchar' 74 | */ 75 | public static PercentEncoder getPathEncoder() { 76 | return new PercentEncoder(PATH_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE) 77 | .onUnmappableCharacter(REPLACE)); 78 | } 79 | 80 | /** 81 | * @return a PercentEncoder for RFC 1738 S3.3 matrix params 82 | */ 83 | public static PercentEncoder getMatrixEncoder() { 84 | return new PercentEncoder(MATRIX_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE) 85 | .onUnmappableCharacter(REPLACE)); 86 | } 87 | 88 | /** 89 | * @return a PercentEncoder for RFC 3986 'query'' 90 | */ 91 | public static PercentEncoder getUnstructuredQueryEncoder() { 92 | return new PercentEncoder(UNSTRUCTURED_QUERY_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE) 93 | .onUnmappableCharacter(REPLACE)); 94 | } 95 | 96 | /** 97 | * @return a PercentEncoder for HTML queries 98 | */ 99 | public static PercentEncoder getQueryParamEncoder() { 100 | return new PercentEncoder(QUERY_PARAM_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE) 101 | .onUnmappableCharacter(REPLACE)); 102 | } 103 | 104 | /** 105 | * @return a PercentEncoder for fragments 106 | */ 107 | public static PercentEncoder getFragmentEncoder() { 108 | return new PercentEncoder(FRAGMENT_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE) 109 | .onUnmappableCharacter(REPLACE)); 110 | } 111 | 112 | private UrlPercentEncoders() { 113 | } 114 | 115 | /** 116 | * Add code points for 'fragment' chars 117 | * 118 | * @param fragmentBitSet bit set 119 | */ 120 | private static void addFragment(BitSet fragmentBitSet) { 121 | addPChar(fragmentBitSet); 122 | fragmentBitSet.set((int) '/'); 123 | fragmentBitSet.set((int) '?'); 124 | } 125 | 126 | /** 127 | * Add code points for 'query' chars 128 | * 129 | * @param queryBitSet bit set 130 | */ 131 | private static void addQuery(BitSet queryBitSet) { 132 | addPChar(queryBitSet); 133 | queryBitSet.set((int) '/'); 134 | queryBitSet.set((int) '?'); 135 | } 136 | 137 | /** 138 | * Add code points for 'pchar' chars. 139 | * 140 | * @param bs bitset 141 | */ 142 | private static void addPChar(BitSet bs) { 143 | addUnreserved(bs); 144 | addSubdelims(bs); 145 | bs.set((int) ':'); 146 | bs.set((int) '@'); 147 | } 148 | 149 | /** 150 | * Add codepoints for 'unreserved' chars 151 | * 152 | * @param bs bitset to add codepoints to 153 | */ 154 | private static void addUnreserved(BitSet bs) { 155 | 156 | for (int i = 'a'; i <= 'z'; i++) { 157 | bs.set(i); 158 | } 159 | for (int i = 'A'; i <= 'Z'; i++) { 160 | bs.set(i); 161 | } 162 | for (int i = '0'; i <= '9'; i++) { 163 | bs.set(i); 164 | } 165 | bs.set((int) '-'); 166 | bs.set((int) '.'); 167 | bs.set((int) '_'); 168 | bs.set((int) '~'); 169 | } 170 | 171 | /** 172 | * Add codepoints for 'sub-delims' chars 173 | * 174 | * @param bs bitset to add codepoints to 175 | */ 176 | private static void addSubdelims(BitSet bs) { 177 | bs.set((int) '!'); 178 | bs.set((int) '$'); 179 | bs.set((int) '&'); 180 | bs.set((int) '\''); 181 | bs.set((int) '('); 182 | bs.set((int) ')'); 183 | bs.set((int) '*'); 184 | bs.set((int) '+'); 185 | bs.set((int) ','); 186 | bs.set((int) ';'); 187 | bs.set((int) '='); 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /src/test/java/com/palominolabs/http/url/PercentEncoderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Palomino Labs, Inc. 3 | */ 4 | 5 | package com.palominolabs.http.url; 6 | 7 | import java.nio.charset.CharacterCodingException; 8 | import java.nio.charset.MalformedInputException; 9 | import java.nio.charset.UnmappableCharacterException; 10 | import java.util.BitSet; 11 | import org.junit.jupiter.api.BeforeEach; 12 | import org.junit.jupiter.api.Test; 13 | 14 | import static java.nio.charset.CodingErrorAction.REPLACE; 15 | import static java.nio.charset.StandardCharsets.UTF_16BE; 16 | import static java.nio.charset.StandardCharsets.UTF_8; 17 | import static org.junit.jupiter.api.Assertions.assertEquals; 18 | 19 | public final class PercentEncoderTest { 20 | 21 | private PercentEncoder alnum; 22 | private PercentEncoder alnum16; 23 | 24 | @BeforeEach 25 | public void setUp() { 26 | BitSet bs = new BitSet(); 27 | for (int i = 'a'; i <= 'z'; i++) { 28 | bs.set(i); 29 | } 30 | for (int i = 'A'; i <= 'Z'; i++) { 31 | bs.set(i); 32 | } 33 | for (int i = '0'; i <= '9'; i++) { 34 | bs.set(i); 35 | } 36 | 37 | this.alnum = new PercentEncoder(bs, UTF_8.newEncoder().onMalformedInput(REPLACE) 38 | .onUnmappableCharacter(REPLACE)); 39 | this.alnum16 = new PercentEncoder(bs, UTF_16BE.newEncoder().onMalformedInput(REPLACE) 40 | .onUnmappableCharacter(REPLACE)); 41 | } 42 | 43 | @Test 44 | public void testDoesntEncodeSafe() throws CharacterCodingException { 45 | BitSet set = new BitSet(); 46 | for (int i = 'a'; i <= 'z'; i++) { 47 | set.set(i); 48 | } 49 | 50 | PercentEncoder pe = new PercentEncoder(set, UTF_8.newEncoder().onMalformedInput(REPLACE) 51 | .onUnmappableCharacter(REPLACE)); 52 | assertEquals("abcd%41%42%43%44", pe.encode("abcdABCD")); 53 | } 54 | 55 | @Test 56 | public void testEncodeInBetweenSafe() throws MalformedInputException, UnmappableCharacterException { 57 | assertEquals("abc%20123", alnum.encode("abc 123")); 58 | } 59 | 60 | @Test 61 | public void testSafeInBetweenEncoded() throws MalformedInputException, UnmappableCharacterException { 62 | assertEquals("%20abc%20", alnum.encode(" abc ")); 63 | } 64 | 65 | @Test 66 | public void testEncodeUtf8() throws CharacterCodingException { 67 | // 1 UTF-16 char (unicode snowman) 68 | assertEquals("snowman%E2%98%83", alnum.encode("snowman\u2603")); 69 | } 70 | 71 | @Test 72 | public void testEncodeUtf8SurrogatePair() throws CharacterCodingException { 73 | // musical G clef: 1d11e, has to be represented in surrogate pair form 74 | assertEquals("clef%F0%9D%84%9E", alnum.encode("clef\ud834\udd1e")); 75 | } 76 | 77 | @Test 78 | public void testEncodeUtf16() throws CharacterCodingException { 79 | // 1 UTF-16 char (unicode snowman) 80 | assertEquals("snowman%26%03", alnum16.encode("snowman\u2603")); 81 | } 82 | 83 | @Test 84 | public void testUrlEncodedUtf16SurrogatePair() throws CharacterCodingException { 85 | // musical G clef: 1d11e, has to be represented in surrogate pair form 86 | assertEquals("clef%D8%34%DD%1E", alnum16.encode("clef\ud834\udd1e")); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/test/java/com/palominolabs/http/url/UrlBuilderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 Palomino Labs, Inc. 3 | */ 4 | 5 | package com.palominolabs.http.url; 6 | 7 | import java.net.MalformedURLException; 8 | import java.net.URI; 9 | import java.net.URISyntaxException; 10 | import java.net.URL; 11 | import java.nio.charset.CharacterCodingException; 12 | import org.junit.jupiter.api.Test; 13 | 14 | import static com.palominolabs.http.url.UrlBuilder.forHost; 15 | import static com.palominolabs.http.url.UrlBuilder.fromUrl; 16 | import static org.junit.jupiter.api.Assertions.assertEquals; 17 | import static org.junit.jupiter.api.Assertions.fail; 18 | 19 | public final class UrlBuilderTest { 20 | 21 | @Test 22 | public void testNoUrlParts() throws CharacterCodingException { 23 | assertUrlEquals("http://foo.com", forHost("http", "foo.com").toUrlString()); 24 | } 25 | 26 | @Test 27 | public void testWithPort() throws CharacterCodingException { 28 | assertUrlEquals("http://foo.com:33", forHost("http", "foo.com", 33).toUrlString()); 29 | } 30 | 31 | @Test 32 | public void testSimplePath() throws CharacterCodingException { 33 | UrlBuilder ub = forHost("http", "foo.com"); 34 | ub.pathSegment("seg1").pathSegment("seg2"); 35 | assertUrlEquals("http://foo.com/seg1/seg2", ub.toUrlString()); 36 | } 37 | 38 | @Test 39 | public void testPathWithReserved() throws CharacterCodingException { 40 | // RFC 1738 S3.3 41 | UrlBuilder ub = forHost("http", "foo.com"); 42 | ub.pathSegment("seg/;?ment").pathSegment("seg=&2"); 43 | assertUrlEquals("http://foo.com/seg%2F%3B%3Fment/seg=&2", ub.toUrlString()); 44 | } 45 | 46 | @Test 47 | public void testPathSegments() throws CharacterCodingException { 48 | UrlBuilder ub = forHost("http", "foo.com"); 49 | ub.pathSegments("seg1", "seg2", "seg3"); 50 | assertUrlEquals("http://foo.com/seg1/seg2/seg3", ub.toUrlString()); 51 | } 52 | 53 | @Test 54 | public void testMatrixWithoutPathHasLeadingSlash() throws CharacterCodingException { 55 | UrlBuilder ub = forHost("http", "foo.com"); 56 | ub.matrixParam("foo", "bar"); 57 | assertUrlEquals("http://foo.com/;foo=bar", ub.toUrlString()); 58 | } 59 | 60 | @Test 61 | public void testMatrixWithReserved() throws CharacterCodingException { 62 | UrlBuilder ub = forHost("http", "foo.com") 63 | .pathSegment("foo") 64 | .matrixParam("foo", "bar") 65 | .matrixParam("res;=?#/erved", "value") 66 | .pathSegment("baz"); 67 | assertUrlEquals("http://foo.com/foo;foo=bar;res%3B%3D%3F%23%2Ferved=value/baz", ub.toUrlString()); 68 | } 69 | 70 | @Test 71 | public void testUrlEncodedPathSegmentUtf8() throws CharacterCodingException { 72 | // 1 UTF-16 char 73 | UrlBuilder ub = forHost("http", "foo.com"); 74 | ub.pathSegment("snowman").pathSegment("\u2603"); 75 | assertUrlEquals("http://foo.com/snowman/%E2%98%83", ub.toUrlString()); 76 | } 77 | 78 | @Test 79 | public void testUrlEncodedPathSegmentUtf8SurrogatePair() throws CharacterCodingException { 80 | UrlBuilder ub = forHost("http", "foo.com"); 81 | // musical G clef: 1d11e, has to be represented in surrogate pair form 82 | ub.pathSegment("clef").pathSegment("\ud834\udd1e"); 83 | assertUrlEquals("http://foo.com/clef/%F0%9D%84%9E", ub.toUrlString()); 84 | } 85 | 86 | @Test 87 | public void testQueryParamNoPath() throws CharacterCodingException { 88 | UrlBuilder ub = forHost("http", "foo.com"); 89 | ub.queryParam("foo", "bar"); 90 | String s = ub.toUrlString(); 91 | assertUrlEquals("http://foo.com?foo=bar", s); 92 | } 93 | 94 | @Test 95 | public void testQueryParamsDuplicated() throws CharacterCodingException { 96 | UrlBuilder ub = forHost("http", "foo.com"); 97 | ub.queryParam("foo", "bar"); 98 | ub.queryParam("foo", "bar2"); 99 | ub.queryParam("baz", "quux"); 100 | ub.queryParam("baz", "quux2"); 101 | assertUrlEquals("http://foo.com?foo=bar&foo=bar2&baz=quux&baz=quux2", ub.toUrlString()); 102 | } 103 | 104 | @Test 105 | public void testEncodeQueryParams() throws CharacterCodingException { 106 | UrlBuilder ub = forHost("http", "foo.com"); 107 | ub.queryParam("foo", "bar&=#baz"); 108 | ub.queryParam("foo", "bar?/2"); 109 | assertUrlEquals("http://foo.com?foo=bar%26%3D%23baz&foo=bar?/2", ub.toUrlString()); 110 | } 111 | 112 | @Test 113 | public void testEncodeQueryParamWithSpaceAndPlus() throws CharacterCodingException { 114 | UrlBuilder ub = forHost("http", "foo.com"); 115 | ub.queryParam("foo", "spa ce"); 116 | ub.queryParam("fo+o", "plus+"); 117 | assertUrlEquals("http://foo.com?foo=spa%20ce&fo%2Bo=plus%2B", ub.toUrlString()); 118 | } 119 | 120 | @Test 121 | public void testPlusInVariousParts() throws CharacterCodingException { 122 | UrlBuilder ub = forHost("http", "foo.com"); 123 | 124 | ub.pathSegment("has+plus") 125 | .matrixParam("plusMtx", "pl+us") 126 | .queryParam("plusQp", "pl+us") 127 | .fragment("plus+frag"); 128 | 129 | assertUrlEquals("http://foo.com/has+plus;plusMtx=pl+us?plusQp=pl%2Bus#plus+frag", ub.toUrlString()); 130 | } 131 | 132 | @Test 133 | public void testFragment() throws CharacterCodingException { 134 | UrlBuilder ub = forHost("http", "foo.com"); 135 | ub.queryParam("foo", "bar"); 136 | ub.fragment("#frag/?"); 137 | assertUrlEquals("http://foo.com?foo=bar#%23frag/?", ub.toUrlString()); 138 | } 139 | 140 | @Test 141 | public void testAllParts() throws CharacterCodingException { 142 | UrlBuilder ub = forHost("https", "foo.bar.com", 3333); 143 | ub.pathSegment("foo"); 144 | ub.pathSegment("bar"); 145 | ub.matrixParam("mtx1", "val1"); 146 | ub.matrixParam("mtx2", "val2"); 147 | ub.queryParam("q1", "v1"); 148 | ub.queryParam("q2", "v2"); 149 | ub.fragment("zomg it's a fragment"); 150 | 151 | assertEquals("https://foo.bar.com:3333/foo/bar;mtx1=val1;mtx2=val2?q1=v1&q2=v2#zomg%20it's%20a%20fragment", 152 | ub.toUrlString()); 153 | } 154 | 155 | @Test 156 | public void testIPv4Literal() throws CharacterCodingException { 157 | UrlBuilder ub = forHost("http", "127.0.0.1"); 158 | assertUrlEquals("http://127.0.0.1", ub.toUrlString()); 159 | } 160 | 161 | @Test 162 | public void testBadIPv4LiteralDoesntChoke() throws CharacterCodingException { 163 | UrlBuilder ub = forHost("http", "300.100.50.1"); 164 | assertUrlEquals("http://300.100.50.1", ub.toUrlString()); 165 | } 166 | 167 | @Test 168 | public void testIPv6LiteralLocalhost() throws CharacterCodingException { 169 | UrlBuilder ub = forHost("http", "[::1]"); 170 | assertUrlEquals("http://[::1]", ub.toUrlString()); 171 | } 172 | 173 | @Test 174 | public void testIPv6Literal() throws CharacterCodingException { 175 | UrlBuilder ub = forHost("http", "[2001:db8:85a3::8a2e:370:7334]"); 176 | assertUrlEquals("http://[2001:db8:85a3::8a2e:370:7334]", ub.toUrlString()); 177 | } 178 | 179 | @Test 180 | public void testEncodedRegNameSingleByte() throws CharacterCodingException { 181 | UrlBuilder ub = forHost("http", "host?name;"); 182 | assertUrlEquals("http://host%3Fname;", ub.toUrlString()); 183 | } 184 | 185 | @Test 186 | public void testEncodedRegNameMultiByte() throws CharacterCodingException { 187 | UrlBuilder ub = forHost("http", "snow\u2603man"); 188 | assertUrlEquals("http://snow%E2%98%83man", ub.toUrlString()); 189 | } 190 | 191 | @Test 192 | public void testForceTrailingSlash() throws CharacterCodingException { 193 | UrlBuilder ub = forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c"); 194 | 195 | assertUrlEquals("https://foo.com/a/b/c/", ub.toUrlString()); 196 | } 197 | 198 | @Test 199 | public void testForceTrailingSlashWithQueryParams() throws CharacterCodingException { 200 | UrlBuilder ub = 201 | forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c").queryParam("foo", "bar"); 202 | 203 | assertUrlEquals("https://foo.com/a/b/c/?foo=bar", ub.toUrlString()); 204 | } 205 | 206 | @Test 207 | public void testForceTrailingSlashNoPathSegmentsWithMatrixParams() throws CharacterCodingException { 208 | UrlBuilder ub = forHost("https", "foo.com").forceTrailingSlash().matrixParam("m1", "v1"); 209 | 210 | assertUrlEquals("https://foo.com/;m1=v1/", ub.toUrlString()); 211 | } 212 | 213 | @Test 214 | public void testIntermingledMatrixParamsAndPathSegments() throws CharacterCodingException { 215 | 216 | UrlBuilder ub = forHost("http", "foo.com") 217 | .pathSegments("seg1", "seg2") 218 | .matrixParam("m1", "v1") 219 | .pathSegment("seg3") 220 | .matrixParam("m2", "v2"); 221 | 222 | assertUrlEquals("http://foo.com/seg1/seg2;m1=v1/seg3;m2=v2", ub.toUrlString()); 223 | } 224 | 225 | @Test 226 | public void testFromUrlWithEverything() { 227 | String orig = 228 | "https://foo.bar.com:3333/foo/ba%20r;mtx1=val1;mtx2=val%202/seg%203;m2=v2?q1=v1&q2=v%202#zomg%20it's%20a%20fragment"; 229 | assertUrlBuilderRoundtrip(orig); 230 | } 231 | 232 | @Test 233 | public void testFromUrlWithEmptyPath() { 234 | assertUrlBuilderRoundtrip("http://foo.com"); 235 | } 236 | 237 | @Test 238 | public void testFromUrlWithEmptyPathAndSlash() { 239 | assertUrlBuilderRoundtrip("http://foo.com/", "http://foo.com"); 240 | } 241 | 242 | @Test 243 | public void testFromUrlWithPort() { 244 | assertUrlBuilderRoundtrip("http://foo.com:1234"); 245 | } 246 | 247 | @Test 248 | public void testFromUrlWithEmptyPathSegent() { 249 | assertUrlBuilderRoundtrip("http://foo.com/foo//", "http://foo.com/foo"); 250 | } 251 | 252 | @Test 253 | public void testFromUrlWithEncodedHost() { 254 | assertUrlBuilderRoundtrip("http://f%20oo.com/bar"); 255 | } 256 | 257 | @Test 258 | public void testFromUrlWithEncodedPathSegment() { 259 | assertUrlBuilderRoundtrip("http://foo.com/foo/b%20ar"); 260 | } 261 | 262 | @Test 263 | public void testFromUrlWithEncodedMatrixParam() { 264 | assertUrlBuilderRoundtrip("http://foo.com/foo;m1=v1;m%202=v%202"); 265 | } 266 | 267 | @Test 268 | public void testFromUrlWithEncodedQueryParam() { 269 | assertUrlBuilderRoundtrip("http://foo.com/foo?q%201=v%202&q2=v2"); 270 | } 271 | 272 | @Test 273 | public void testFromUrlWithEncodedQueryParamDelimiter() { 274 | assertUrlBuilderRoundtrip("http://foo.com/foo?q1=%3Dv1&%26q2=v2"); 275 | } 276 | 277 | @Test 278 | public void testFromUrlWithEncodedFragment() { 279 | assertUrlBuilderRoundtrip("http://foo.com/foo#b%20ar"); 280 | } 281 | 282 | @Test 283 | public void testFromUrlWithMalformedMatrixPair() throws MalformedURLException, CharacterCodingException { 284 | try { 285 | fromUrl(new URL("http://foo.com/foo;m1=v1=v2")); 286 | fail(); 287 | } catch (IllegalArgumentException e) { 288 | assertEquals("Malformed matrix param: ", e.getMessage()); 289 | } 290 | } 291 | 292 | @Test 293 | public void testFromUrlWithEmptyPathSegmentWithMatrixParams() { 294 | assertUrlBuilderRoundtrip("http://foo.com/foo/;m1=v1"); 295 | } 296 | 297 | @Test 298 | public void testFromUrlWithEmptyPathWithMatrixParams() { 299 | assertUrlBuilderRoundtrip("http://foo.com/;m1=v1"); 300 | } 301 | 302 | @Test 303 | public void testFromUrlWithEmptyPathWithMultipleMatrixParams() { 304 | assertUrlBuilderRoundtrip("http://foo.com/;m1=v1;m2=v2"); 305 | } 306 | 307 | @Test 308 | public void testFromUrlWithPathSegmentEndingWithSemicolon() { 309 | assertUrlBuilderRoundtrip("http://foo.com/foo;", "http://foo.com/foo"); 310 | } 311 | 312 | @Test 313 | public void testPercentDecodeInvalidPair() throws MalformedURLException, CharacterCodingException { 314 | try { 315 | fromUrl(new URL("http://foo.com/fo%2o")); 316 | fail(); 317 | } catch (IllegalArgumentException e) { 318 | assertEquals("Invalid %-tuple <%2o>", e.getMessage()); 319 | } 320 | } 321 | 322 | @Test 323 | public void testFromUrlMalformedQueryParamMultiValues() { 324 | assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1=v2"); 325 | } 326 | 327 | @Test 328 | public void testFromUrlMalformedQueryParamNoValue() { 329 | assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1&q2"); 330 | } 331 | 332 | @Test 333 | public void testFromUrlUnstructuredQueryWithEscapedChars() { 334 | assertUrlBuilderRoundtrip("http://foo.com/foo?query==&%23"); 335 | } 336 | 337 | @Test 338 | public void testCantUseQueryParamAfterQuery() { 339 | UrlBuilder ub = forHost("http", "foo.com").unstructuredQuery("q"); 340 | 341 | try { 342 | ub.queryParam("foo", "bar"); 343 | fail(); 344 | } catch (IllegalStateException e) { 345 | assertEquals("Cannot call queryParam() when this already has an unstructured query specified", 346 | e.getMessage()); 347 | } 348 | } 349 | 350 | @Test 351 | public void testCantUseQueryAfterQueryParam() { 352 | UrlBuilder ub = forHost("http", "foo.com").queryParam("foo", "bar"); 353 | 354 | try { 355 | ub.unstructuredQuery("q"); 356 | 357 | fail(); 358 | } catch (IllegalStateException e) { 359 | assertEquals("Cannot call unstructuredQuery() when this already has queryParam pairs specified", 360 | e.getMessage()); 361 | } 362 | } 363 | 364 | @Test 365 | public void testUnstructuredQueryWithNoSpecialChars() throws CharacterCodingException { 366 | assertUrlEquals("http://foo.com?q", forHost("http", "foo.com").unstructuredQuery("q").toUrlString()); 367 | } 368 | 369 | @Test 370 | public void testUnstructuredQueryWithOkSpecialChars() throws CharacterCodingException { 371 | assertUrlEquals("http://foo.com?q?/&=", forHost("http", "foo.com").unstructuredQuery("q?/&=").toUrlString()); 372 | } 373 | 374 | @Test 375 | public void testUnstructuredQueryWithEscapedSpecialChars() throws CharacterCodingException { 376 | assertUrlEquals("http://foo.com?q%23%2B", forHost("http", "foo.com").unstructuredQuery("q#+").toUrlString()); 377 | } 378 | 379 | @Test 380 | public void testClearQueryRemovesQueryParam() throws CharacterCodingException { 381 | UrlBuilder ub = forHost("http", "host") 382 | .queryParam("foo", "bar") 383 | .clearQuery(); 384 | assertUrlEquals("http://host", ub.toUrlString()); 385 | } 386 | 387 | @Test 388 | public void testClearQueryRemovesUnstructuredQuery() throws CharacterCodingException { 389 | UrlBuilder ub = forHost("http", "host") 390 | .unstructuredQuery("foobar") 391 | .clearQuery(); 392 | assertUrlEquals("http://host", ub.toUrlString()); 393 | } 394 | 395 | @Test 396 | public void testClearQueryAfterQueryParamAllowsQuery() throws CharacterCodingException { 397 | UrlBuilder ub = forHost("http", "host") 398 | .queryParam("foo", "bar") 399 | .clearQuery() 400 | .unstructuredQuery("foobar"); 401 | assertUrlEquals("http://host?foobar", ub.toUrlString()); 402 | } 403 | 404 | @Test 405 | public void testClearQueryAfterQueryAllowsQueryParam() throws CharacterCodingException { 406 | UrlBuilder ub = forHost("http", "host") 407 | .unstructuredQuery("foobar") 408 | .clearQuery() 409 | .queryParam("foo", "bar"); 410 | assertUrlEquals("http://host?foo=bar", ub.toUrlString()); 411 | } 412 | 413 | private void assertUrlBuilderRoundtrip(String url) { 414 | assertUrlBuilderRoundtrip(url, url); 415 | } 416 | 417 | /** 418 | * @param origUrl the url that will be used to create a URL 419 | * @param finalUrl the URL string it should end up as 420 | */ 421 | private void assertUrlBuilderRoundtrip(String origUrl, String finalUrl) { 422 | try { 423 | assertUrlEquals(finalUrl, fromUrl(new URL(origUrl)).toUrlString()); 424 | } catch (CharacterCodingException | MalformedURLException e) { 425 | throw new RuntimeException(e); 426 | } 427 | } 428 | 429 | private static void assertUrlEquals(String expected, String actual) { 430 | assertEquals(expected, actual); 431 | try { 432 | assertEquals(expected, new URI(actual).toString()); 433 | } catch (URISyntaxException e) { 434 | throw new RuntimeException(e); 435 | } 436 | try { 437 | assertEquals(expected, new URL(actual).toString()); 438 | } catch (MalformedURLException e) { 439 | throw new RuntimeException(e); 440 | } 441 | } 442 | } 443 | -------------------------------------------------------------------------------- /src/test/kotlin/com/palominolabs/http/url/PercentDecoderTest.kt: -------------------------------------------------------------------------------- 1 | package com.palominolabs.http.url 2 | 3 | import org.junit.jupiter.api.BeforeEach 4 | import org.junit.jupiter.api.Test 5 | import org.junit.jupiter.api.assertThrows 6 | import java.lang.Character.isHighSurrogate 7 | import java.lang.Character.isLowSurrogate 8 | import java.nio.charset.StandardCharsets.UTF_8 9 | import java.util.Random 10 | import kotlin.streams.asSequence 11 | import kotlin.test.assertEquals 12 | import kotlin.test.fail 13 | 14 | class PercentDecoderTest { 15 | private lateinit var decoder: PercentDecoder 16 | 17 | @BeforeEach 18 | fun setUp() { 19 | decoder = PercentDecoder(UTF_8.newDecoder()) 20 | } 21 | 22 | @Test 23 | fun testDecodesWithoutPercents() { 24 | assertEquals("asdf", decoder.decode("asdf")) 25 | } 26 | 27 | @Test 28 | fun testDecodeSingleByte() { 29 | assertEquals("#", decoder.decode("%23")) 30 | } 31 | 32 | @Test 33 | fun testIncompletePercentPairNoNumbers() { 34 | val e = assertThrows { decoder.decode("%") } 35 | assertEquals("Could not percent decode <%>: incomplete %-pair at position 0", e.message) 36 | } 37 | 38 | @Test 39 | fun testIncompletePercentPairOneNumber() { 40 | val e = assertThrows { decoder.decode("%2") } 41 | assertEquals("Could not percent decode <%2>: incomplete %-pair at position 0", e.message) 42 | } 43 | 44 | @Test 45 | fun testInvalidHex() { 46 | val e = assertThrows { decoder.decode("%xz") } 47 | assertEquals("Invalid %-tuple <%xz>", e.message) 48 | } 49 | 50 | @Test 51 | fun testRandomStrings() { 52 | val encoder = UrlPercentEncoders.getUnstructuredQueryEncoder() 53 | val rand = Random() 54 | 55 | val seed = rand.nextLong() 56 | rand.setSeed(seed) 57 | 58 | val charBuf = CharArray(2) 59 | val codePoints = mutableListOf() 60 | val buf = StringBuilder() 61 | 62 | repeat(10_000) { 63 | buf.setLength(0) 64 | codePoints.clear() 65 | 66 | randString(buf, codePoints, charBuf, rand, 1 + rand.nextInt(1000)) 67 | 68 | val origBytes = buf.toString().encodeToByteArray() 69 | val codePointsHex = codePoints.map { i -> Integer.toHexString(i) } 70 | 71 | val decodedBytes = 72 | try { 73 | decoder.decode(encoder.encode(buf.toString())).encodeToByteArray() 74 | } catch (e: IllegalArgumentException) { 75 | val charHex = buf.toString() 76 | .chars() 77 | .asSequence() 78 | .map { Integer.toHexString(it) } 79 | .toList() 80 | fail("seed: $seed code points: $codePointsHex chars $charHex $e.message") 81 | } 82 | 83 | assertEquals(toHex(origBytes), toHex(decodedBytes), "Seed: $seed Code points: $codePointsHex") 84 | } 85 | } 86 | 87 | /** 88 | * Generate a random string 89 | * @param buf buffer to write into 90 | * @param codePoints list of code points to write into 91 | * @param charBuf char buf for temporary char wrangling (size 2) 92 | * @param rand random source 93 | * @param length max string length 94 | */ 95 | private fun randString( 96 | buf: StringBuilder, 97 | codePoints: MutableList, 98 | charBuf: CharArray, 99 | rand: Random, 100 | length: Int 101 | ) { 102 | while (buf.length < length) { 103 | // pick something in the range of all 17 unicode planes 104 | val codePoint = rand.nextInt(17 * 65536) 105 | if (Character.isDefined(codePoint)) { 106 | val res = Character.toChars(codePoint, charBuf, 0) 107 | 108 | if (res == CODE_POINT_IN_BMP && (isHighSurrogate(charBuf[0]) || isLowSurrogate(charBuf[0]))) { 109 | // isDefined is true even if it's a standalone surrogate in the D800-DFFF range, but those are not legal 110 | // single unicode code units (that is, a single char) 111 | continue 112 | } 113 | 114 | buf.append(charBuf[0]) 115 | // whether it's a pair or not, we want the only char (or high surrogate) 116 | codePoints.add(codePoint) 117 | if (res == CODE_POINT_IN_SUPPLEMENTARY) { 118 | // it's a surrogate pair, so we care about the second char 119 | buf.append(charBuf[1]) 120 | } 121 | } 122 | } 123 | } 124 | } 125 | 126 | /** 127 | * @param bytes 128 | * @return list of hex strings 129 | */ 130 | private fun toHex(bytes: ByteArray): List = bytes.map { Integer.toHexString(it.toInt().and(0xFF)) } 131 | 132 | private const val CODE_POINT_IN_SUPPLEMENTARY = 2 133 | private const val CODE_POINT_IN_BMP = 1 134 | --------------------------------------------------------------------------------