├── .gitignore
├── LICENSE
├── README.md
├── RELEASE-NOTES.md
├── build.gradle.kts
├── gradle.properties
├── gradle
└── wrapper
│ ├── gradle-wrapper.jar
│ └── gradle-wrapper.properties
├── gradlew
├── gradlew.bat
├── settings.gradle.kts
└── src
├── jmh
└── java
│ └── com
│ └── palominolabs
│ └── http
│ └── url
│ ├── PercentDecoderBenchmark.java
│ ├── PercentEncoderBenchmark.java
│ ├── URLDecoderBenchmark.java
│ └── URLEncoderBenchmark.java
├── main
└── java
│ └── com
│ └── palominolabs
│ └── http
│ └── url
│ ├── PercentDecoder.java
│ ├── PercentEncoder.java
│ ├── PercentEncoderOutputHandler.java
│ ├── StringBuilderPercentEncoderOutputHandler.java
│ ├── UrlBuilder.java
│ └── UrlPercentEncoders.java
└── test
├── java
└── com
│ └── palominolabs
│ └── http
│ └── url
│ ├── PercentEncoderTest.java
│ └── UrlBuilderTest.java
└── kotlin
└── com
└── palominolabs
└── http
└── url
└── PercentDecoderTest.kt
/.gitignore:
--------------------------------------------------------------------------------
1 | *.iml
2 | *.ipr
3 | *.iws
4 | *.swp
5 | ./out
6 | .DS_Store
7 | .directory
8 | .gradle
9 | .idea
10 | build
11 | target
12 | out
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | # Copyfree Open Innovation License
2 |
3 | This is version 1.0 of the Copyfree Open Innovation License.
4 |
5 | ## Terms and Conditions
6 |
7 | Redistributions, modified or unmodified, in whole or in part, must retain
8 | applicable notices of copyright or other legal privilege, these conditions, and
9 | the following license terms and disclaimer. Subject to these conditions, each
10 | holder of copyright or other legal privileges, author or assembler, and
11 | contributor of this work, henceforth "licensor", hereby grants to any person
12 | who obtains a copy of this work in any form:
13 |
14 | 1. Permission to reproduce, modify, distribute, publish, sell, sublicense, use,
15 | and/or otherwise deal in the licensed material without restriction.
16 |
17 | 2. A perpetual, worldwide, non-exclusive, royalty-free, gratis, irrevocable
18 | patent license to make, have made, provide, transfer, import, use, and/or
19 | otherwise deal in the licensed material without restriction, for any and all
20 | patents held by such licensor and necessarily infringed by the form of the work
21 | upon distribution of that licensor's contribution to the work under the terms
22 | of this license.
23 |
24 | NO WARRANTY OF ANY KIND IS IMPLIED BY, OR SHOULD BE INFERRED FROM, THIS LICENSE
25 | OR THE ACT OF DISTRIBUTION UNDER THE TERMS OF THIS LICENSE, INCLUDING BUT NOT
26 | LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
27 | AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS, ASSEMBLERS, OR HOLDERS OF
28 | COPYRIGHT OR OTHER LEGAL PRIVILEGE BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER
29 | LIABILITY, WHETHER IN ACTION OF CONTRACT, TORT, OR OTHERWISE ARISING FROM, OUT
30 | OF, OR IN CONNECTION WITH THE WORK OR THE USE OF OR OTHER DEALINGS IN THE WORK.
31 |
32 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Use this library to safely create valid, correctly encoded URL strings with a fluent API.
2 |
3 | # Usage
4 |
5 | Artifacts are released in Maven Central. For gradle, use the `mavenCentral()` repository.
6 |
7 | Add this to the `dependencies` block in your `build.gradle.kts`:
8 |
9 | ```groovy
10 | implementation("com.palominolabs.http", "url-builder", "VERSION")
11 | ```
12 |
13 | where `VERSION` is the latest released version. If you're using Maven, know that your life could be greatly improved by switching to Gradle and use this dependency block:
14 |
15 | ```xml
16 |
17 | com.palominolabs.http
18 | url-builder
19 | VERSION
20 |
21 | ```
22 |
23 | # Example
24 |
25 | ```java
26 | // showcase the different encoding rules used on different URL components
27 | UrlBuilder.forHost("http", "foo.com")
28 | .pathSegment("with spaces")
29 | .pathSegments("path", "with", "varArgs")
30 | .pathSegment("&=?/")
31 | .queryParam("fancy + name", "fancy?=value")
32 | .matrixParam("matrix", "param?")
33 | .fragment("#?=")
34 | .toUrlString()
35 |
36 | // produces:
37 | // http://foo.com/with%20spaces/path/with/varArgs/&=%3F%2F;matrix=param%3F?fancy%20%2B%20name=fancy?%3Dvalue#%23?=
38 | ```
39 |
40 | # Motivation
41 |
42 | See [this blog post](http://blog.palominolabs.com/2013/10/03/creating-urls-correctly-and-safely/) for a thorough explanation.
43 |
44 | Ideally, the Java SDK would provide a good way to build properly encoded URLs. Unfortunately, it does not.
45 |
46 | [`URLEncoder`](http://docs.oracle.com/javase/7/docs/api/java/net/URLEncoder.html) seems like a thing that you want to use, but amazingly enough it actually does HTML form encoding, not URL encoding.
47 |
48 | URL encoding is also not something that can be done once you've formed a complete URL string. If your URL is already correctly encoded, you do not need to do anything. If it is not, it is impossible to parse it into its constituent parts for subsequent encoding. You must construct a url piece by piece, correctly encoding each piece as you go, to end up with a valid URL string. The encoding rules are also different for different parts of the URL (path, query param, etc.)
49 |
50 | Since the URLs that we use in practice for HTTP have somewhat different rules than "generic" URLs, UrlBuilder errs on the side of usefulness for HTTP-specific URLs. Notably, this means that '+' is percent-encoded to avoid being interpreted as a space. Also, in the URL/URI specs, the query string's format is not defined, but in practice it is used to hold `key=value` pairs separated by `&`.
51 |
52 | # Building
53 |
54 | Run `./gradlew build`.
55 |
--------------------------------------------------------------------------------
/RELEASE-NOTES.md:
--------------------------------------------------------------------------------
1 | - 1.1.5
2 | - Build with Java 8 toolchain to resolve [#13](https://github.com/palominolabs/url-builder/issues/13).
3 | - 1.1.4
4 | - More publication-related tinkering
5 | - 1.1.3
6 | - Switch to publishing via Maven Central
7 | - 1.1.2
8 | - Drop commons-lang, guava, and slf4j dependencies
9 | - Require Java 8
10 | - 1.1.1
11 | - Performance tuning
12 | - Allow unstructured HTTP query (not in `key=value&key=value` form)
13 | - Switch to releasing artifacts on Bintray
14 | - 1.1.0
15 | - Expose PercentEncoder as public
16 | - Allow initializing a UrlBuilder from a `java.net.URL`
17 | - 1.0.2
18 | - Java 6 compatible
19 | - 1.0.1
20 | - Matrix params specified per path segment
21 |
--------------------------------------------------------------------------------
/build.gradle.kts:
--------------------------------------------------------------------------------
1 | import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
2 | import java.net.URI
3 | import java.time.Duration
4 |
5 | plugins {
6 | `java-library`
7 | kotlin("jvm") version "1.7.20"
8 | id("maven-publish")
9 | signing
10 | id("io.github.gradle-nexus.publish-plugin") version "1.1.0"
11 | id("com.github.ben-manes.versions") version "0.44.0"
12 | id("me.champeau.gradle.jmh") version "0.5.3"
13 | id("net.researchgate.release") version "3.0.2"
14 | id("org.jmailen.kotlinter") version "3.12.0"
15 | }
16 |
17 | java {
18 | withSourcesJar()
19 | withJavadocJar()
20 | toolchain {
21 | languageVersion.set(JavaLanguageVersion.of(8))
22 | vendor.set(JvmVendorSpec.AZUL)
23 | }
24 | }
25 |
26 | repositories {
27 | mavenCentral()
28 | }
29 |
30 | val deps by extra {
31 | mapOf(
32 | "slf4j" to "2.0.5",
33 | "jmh" to "1.22",
34 | "junit" to "5.9.1"
35 | )
36 | }
37 |
38 | dependencies {
39 | api("com.google.code.findbugs", "jsr305", "3.0.2")
40 |
41 | testRuntimeOnly("org.slf4j", "slf4j-simple", "${deps["slf4j"]}")
42 | testRuntimeOnly("org.slf4j", "log4j-over-slf4j", "${deps["slf4j"]}")
43 | testRuntimeOnly("org.slf4j", "jcl-over-slf4j", "${deps["slf4j"]}")
44 | testImplementation("org.slf4j", "jul-to-slf4j", "${deps["slf4j"]}")
45 |
46 | testImplementation("org.junit.jupiter", "junit-jupiter-api", "${deps["junit"]}")
47 | testRuntimeOnly("org.junit.jupiter", "junit-jupiter-engine", "${deps["junit"]}")
48 |
49 | testImplementation(kotlin("stdlib-jdk8"))
50 | testImplementation(kotlin("test-junit5"))
51 |
52 |
53 | jmhImplementation("com.google.guava", "guava", "31.1-jre")
54 | }
55 |
56 | group = "com.palominolabs.http"
57 |
58 | tasks {
59 | test {
60 | useJUnitPlatform()
61 | }
62 |
63 | withType {
64 | kotlinOptions.jvmTarget = "1.8"
65 | }
66 | }
67 |
68 | publishing {
69 | publications {
70 | register("sonatype") {
71 | from(components["java"])
72 |
73 | // sonatype required pom elements
74 | pom {
75 | name.set("${project.group}:${project.name}")
76 | description.set(name)
77 | url.set("https://github.com/palominolabs/url-builder")
78 | licenses {
79 | license {
80 | name.set("Copyfree Open Innovation License 0.4")
81 | url.set("https://copyfree.org/content/standard/licenses/coil/license.txt")
82 | }
83 | }
84 | developers {
85 | developer {
86 | id.set("marshallpierce")
87 | name.set("Marshall Pierce")
88 | email.set("575695+marshallpierce@users.noreply.github.com")
89 | }
90 | }
91 | scm {
92 | connection.set("scm:git:https://github.com/palominolabs/url-builder")
93 | developerConnection.set("scm:git:ssh://git@github.com:palominolabs/url-builder.git")
94 | url.set("https://github.com/palominolabs/url-builder")
95 | }
96 | }
97 | }
98 | }
99 |
100 | // A safe throw-away place to publish to:
101 | // ./gradlew publishSonatypePublicationToLocalDebugRepository -Pversion=foo
102 | repositories {
103 | maven {
104 | name = "localDebug"
105 | url = URI.create("file:///${project.buildDir}/repos/localDebug")
106 | }
107 | }
108 | }
109 |
110 | jmh {
111 | jmhVersion = deps["jmh"]
112 | }
113 |
114 | tasks.afterReleaseBuild {
115 | dependsOn(provider { project.tasks.named("publishToSonatype") })
116 | }
117 |
118 | // don't barf for devs without signing set up
119 | if (project.hasProperty("signing.keyId")) {
120 | signing {
121 | sign(project.extensions.getByType().publications["sonatype"])
122 | }
123 | }
124 |
125 | nexusPublishing {
126 | repositories {
127 | sonatype {
128 | // sonatypeUsername and sonatypePassword properties are used automatically
129 | stagingProfileId.set("26c8b7fff47581") // com.palominolabs
130 | }
131 | }
132 | // these are not strictly required. The default timeouts are set to 1 minute. But Sonatype can be really slow.
133 | // If you get the error "java.net.SocketTimeoutException: timeout", these lines will help.
134 | connectTimeout.set(Duration.ofMinutes(3))
135 | clientTimeout.set(Duration.ofMinutes(3))
136 | }
137 |
138 | release {
139 | git {
140 | requireBranch.set("master")
141 | }
142 | }
143 |
--------------------------------------------------------------------------------
/gradle.properties:
--------------------------------------------------------------------------------
1 | version = 1.1.6-SNAPSHOT
2 | kotlin.stdlib.default.dependency=false
3 |
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/palominolabs/url-builder/2643fb6c6e6428c5f0a112196a1c486401e950c3/gradle/wrapper/gradle-wrapper.jar
--------------------------------------------------------------------------------
/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | distributionBase=GRADLE_USER_HOME
2 | distributionPath=wrapper/dists
3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.6-bin.zip
4 | networkTimeout=10000
5 | zipStoreBase=GRADLE_USER_HOME
6 | zipStorePath=wrapper/dists
7 |
--------------------------------------------------------------------------------
/gradlew:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | #
4 | # Copyright © 2015-2021 the original authors.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # https://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | #
18 |
19 | ##############################################################################
20 | #
21 | # Gradle start up script for POSIX generated by Gradle.
22 | #
23 | # Important for running:
24 | #
25 | # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
26 | # noncompliant, but you have some other compliant shell such as ksh or
27 | # bash, then to run this script, type that shell name before the whole
28 | # command line, like:
29 | #
30 | # ksh Gradle
31 | #
32 | # Busybox and similar reduced shells will NOT work, because this script
33 | # requires all of these POSIX shell features:
34 | # * functions;
35 | # * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
36 | # «${var#prefix}», «${var%suffix}», and «$( cmd )»;
37 | # * compound commands having a testable exit status, especially «case»;
38 | # * various built-in commands including «command», «set», and «ulimit».
39 | #
40 | # Important for patching:
41 | #
42 | # (2) This script targets any POSIX shell, so it avoids extensions provided
43 | # by Bash, Ksh, etc; in particular arrays are avoided.
44 | #
45 | # The "traditional" practice of packing multiple parameters into a
46 | # space-separated string is a well documented source of bugs and security
47 | # problems, so this is (mostly) avoided, by progressively accumulating
48 | # options in "$@", and eventually passing that to Java.
49 | #
50 | # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
51 | # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
52 | # see the in-line comments for details.
53 | #
54 | # There are tweaks for specific operating systems such as AIX, CygWin,
55 | # Darwin, MinGW, and NonStop.
56 | #
57 | # (3) This script is generated from the Groovy template
58 | # https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
59 | # within the Gradle project.
60 | #
61 | # You can find Gradle at https://github.com/gradle/gradle/.
62 | #
63 | ##############################################################################
64 |
65 | # Attempt to set APP_HOME
66 |
67 | # Resolve links: $0 may be a link
68 | app_path=$0
69 |
70 | # Need this for daisy-chained symlinks.
71 | while
72 | APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
73 | [ -h "$app_path" ]
74 | do
75 | ls=$( ls -ld "$app_path" )
76 | link=${ls#*' -> '}
77 | case $link in #(
78 | /*) app_path=$link ;; #(
79 | *) app_path=$APP_HOME$link ;;
80 | esac
81 | done
82 |
83 | # This is normally unused
84 | # shellcheck disable=SC2034
85 | APP_BASE_NAME=${0##*/}
86 | APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
87 |
88 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
89 | DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
90 |
91 | # Use the maximum available, or set MAX_FD != -1 to use that value.
92 | MAX_FD=maximum
93 |
94 | warn () {
95 | echo "$*"
96 | } >&2
97 |
98 | die () {
99 | echo
100 | echo "$*"
101 | echo
102 | exit 1
103 | } >&2
104 |
105 | # OS specific support (must be 'true' or 'false').
106 | cygwin=false
107 | msys=false
108 | darwin=false
109 | nonstop=false
110 | case "$( uname )" in #(
111 | CYGWIN* ) cygwin=true ;; #(
112 | Darwin* ) darwin=true ;; #(
113 | MSYS* | MINGW* ) msys=true ;; #(
114 | NONSTOP* ) nonstop=true ;;
115 | esac
116 |
117 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
118 |
119 |
120 | # Determine the Java command to use to start the JVM.
121 | if [ -n "$JAVA_HOME" ] ; then
122 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
123 | # IBM's JDK on AIX uses strange locations for the executables
124 | JAVACMD=$JAVA_HOME/jre/sh/java
125 | else
126 | JAVACMD=$JAVA_HOME/bin/java
127 | fi
128 | if [ ! -x "$JAVACMD" ] ; then
129 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
130 |
131 | Please set the JAVA_HOME variable in your environment to match the
132 | location of your Java installation."
133 | fi
134 | else
135 | JAVACMD=java
136 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
137 |
138 | Please set the JAVA_HOME variable in your environment to match the
139 | location of your Java installation."
140 | fi
141 |
142 | # Increase the maximum file descriptors if we can.
143 | if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
144 | case $MAX_FD in #(
145 | max*)
146 | # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
147 | # shellcheck disable=SC3045
148 | MAX_FD=$( ulimit -H -n ) ||
149 | warn "Could not query maximum file descriptor limit"
150 | esac
151 | case $MAX_FD in #(
152 | '' | soft) :;; #(
153 | *)
154 | # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
155 | # shellcheck disable=SC3045
156 | ulimit -n "$MAX_FD" ||
157 | warn "Could not set maximum file descriptor limit to $MAX_FD"
158 | esac
159 | fi
160 |
161 | # Collect all arguments for the java command, stacking in reverse order:
162 | # * args from the command line
163 | # * the main class name
164 | # * -classpath
165 | # * -D...appname settings
166 | # * --module-path (only if needed)
167 | # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
168 |
169 | # For Cygwin or MSYS, switch paths to Windows format before running java
170 | if "$cygwin" || "$msys" ; then
171 | APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
172 | CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
173 |
174 | JAVACMD=$( cygpath --unix "$JAVACMD" )
175 |
176 | # Now convert the arguments - kludge to limit ourselves to /bin/sh
177 | for arg do
178 | if
179 | case $arg in #(
180 | -*) false ;; # don't mess with options #(
181 | /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
182 | [ -e "$t" ] ;; #(
183 | *) false ;;
184 | esac
185 | then
186 | arg=$( cygpath --path --ignore --mixed "$arg" )
187 | fi
188 | # Roll the args list around exactly as many times as the number of
189 | # args, so each arg winds up back in the position where it started, but
190 | # possibly modified.
191 | #
192 | # NB: a `for` loop captures its iteration list before it begins, so
193 | # changing the positional parameters here affects neither the number of
194 | # iterations, nor the values presented in `arg`.
195 | shift # remove old arg
196 | set -- "$@" "$arg" # push replacement arg
197 | done
198 | fi
199 |
200 | # Collect all arguments for the java command;
201 | # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
202 | # shell script including quotes and variable substitutions, so put them in
203 | # double quotes to make sure that they get re-expanded; and
204 | # * put everything else in single quotes, so that it's not re-expanded.
205 |
206 | set -- \
207 | "-Dorg.gradle.appname=$APP_BASE_NAME" \
208 | -classpath "$CLASSPATH" \
209 | org.gradle.wrapper.GradleWrapperMain \
210 | "$@"
211 |
212 | # Stop when "xargs" is not available.
213 | if ! command -v xargs >/dev/null 2>&1
214 | then
215 | die "xargs is not available"
216 | fi
217 |
218 | # Use "xargs" to parse quoted args.
219 | #
220 | # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
221 | #
222 | # In Bash we could simply go:
223 | #
224 | # readarray ARGS < <( xargs -n1 <<<"$var" ) &&
225 | # set -- "${ARGS[@]}" "$@"
226 | #
227 | # but POSIX shell has neither arrays nor command substitution, so instead we
228 | # post-process each arg (as a line of input to sed) to backslash-escape any
229 | # character that might be a shell metacharacter, then use eval to reverse
230 | # that process (while maintaining the separation between arguments), and wrap
231 | # the whole thing up as a single "set" statement.
232 | #
233 | # This will of course break if any of these variables contains a newline or
234 | # an unmatched quote.
235 | #
236 |
237 | eval "set -- $(
238 | printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
239 | xargs -n1 |
240 | sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
241 | tr '\n' ' '
242 | )" '"$@"'
243 |
244 | exec "$JAVACMD" "$@"
245 |
--------------------------------------------------------------------------------
/gradlew.bat:
--------------------------------------------------------------------------------
1 | @rem
2 | @rem Copyright 2015 the original author or authors.
3 | @rem
4 | @rem Licensed under the Apache License, Version 2.0 (the "License");
5 | @rem you may not use this file except in compliance with the License.
6 | @rem You may obtain a copy of the License at
7 | @rem
8 | @rem https://www.apache.org/licenses/LICENSE-2.0
9 | @rem
10 | @rem Unless required by applicable law or agreed to in writing, software
11 | @rem distributed under the License is distributed on an "AS IS" BASIS,
12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | @rem See the License for the specific language governing permissions and
14 | @rem limitations under the License.
15 | @rem
16 |
17 | @if "%DEBUG%"=="" @echo off
18 | @rem ##########################################################################
19 | @rem
20 | @rem Gradle startup script for Windows
21 | @rem
22 | @rem ##########################################################################
23 |
24 | @rem Set local scope for the variables with windows NT shell
25 | if "%OS%"=="Windows_NT" setlocal
26 |
27 | set DIRNAME=%~dp0
28 | if "%DIRNAME%"=="" set DIRNAME=.
29 | @rem This is normally unused
30 | set APP_BASE_NAME=%~n0
31 | set APP_HOME=%DIRNAME%
32 |
33 | @rem Resolve any "." and ".." in APP_HOME to make it shorter.
34 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
35 |
36 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
37 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
38 |
39 | @rem Find java.exe
40 | if defined JAVA_HOME goto findJavaFromJavaHome
41 |
42 | set JAVA_EXE=java.exe
43 | %JAVA_EXE% -version >NUL 2>&1
44 | if %ERRORLEVEL% equ 0 goto execute
45 |
46 | echo.
47 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
48 | echo.
49 | echo Please set the JAVA_HOME variable in your environment to match the
50 | echo location of your Java installation.
51 |
52 | goto fail
53 |
54 | :findJavaFromJavaHome
55 | set JAVA_HOME=%JAVA_HOME:"=%
56 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
57 |
58 | if exist "%JAVA_EXE%" goto execute
59 |
60 | echo.
61 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
62 | echo.
63 | echo Please set the JAVA_HOME variable in your environment to match the
64 | echo location of your Java installation.
65 |
66 | goto fail
67 |
68 | :execute
69 | @rem Setup the command line
70 |
71 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
72 |
73 |
74 | @rem Execute Gradle
75 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
76 |
77 | :end
78 | @rem End local scope for the variables with windows NT shell
79 | if %ERRORLEVEL% equ 0 goto mainEnd
80 |
81 | :fail
82 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83 | rem the _cmd.exe /c_ return code!
84 | set EXIT_CODE=%ERRORLEVEL%
85 | if %EXIT_CODE% equ 0 set EXIT_CODE=1
86 | if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
87 | exit /b %EXIT_CODE%
88 |
89 | :mainEnd
90 | if "%OS%"=="Windows_NT" endlocal
91 |
92 | :omega
93 |
--------------------------------------------------------------------------------
/settings.gradle.kts:
--------------------------------------------------------------------------------
1 | rootProject.name = "url-builder"
2 |
--------------------------------------------------------------------------------
/src/jmh/java/com/palominolabs/http/url/PercentDecoderBenchmark.java:
--------------------------------------------------------------------------------
1 | package com.palominolabs.http.url;
2 |
3 | import java.nio.charset.CharacterCodingException;
4 | import java.nio.charset.StandardCharsets;
5 | import org.openjdk.jmh.annotations.Benchmark;
6 | import org.openjdk.jmh.annotations.Scope;
7 | import org.openjdk.jmh.annotations.State;
8 |
9 | import static com.palominolabs.http.url.PercentEncoderBenchmark.LARGE_STRING_MIX;
10 | import static com.palominolabs.http.url.PercentEncoderBenchmark.SMALL_STRING_MIX;
11 |
12 | public class PercentDecoderBenchmark {
13 |
14 | static final String SMALL_STRING_ENCODED;
15 | static final String LARGE_STRING_ENCODED;
16 |
17 | static {
18 | PercentEncoder encoder = UrlPercentEncoders.getUnstructuredQueryEncoder();
19 | try {
20 | SMALL_STRING_ENCODED = encoder.encode(SMALL_STRING_MIX);
21 | } catch (CharacterCodingException e) {
22 | throw new RuntimeException(e);
23 | }
24 | try {
25 | LARGE_STRING_ENCODED = encoder.encode(LARGE_STRING_MIX);
26 | } catch (CharacterCodingException e) {
27 | throw new RuntimeException(e);
28 | }
29 | }
30 |
31 | @State(Scope.Thread)
32 | public static class ThreadState {
33 | PercentDecoder decoder = new PercentDecoder(StandardCharsets.UTF_8.newDecoder());
34 | }
35 |
36 | @Benchmark
37 | public String testPercentDecodeSmall(ThreadState state) throws CharacterCodingException {
38 | return state.decoder.decode(SMALL_STRING_ENCODED);
39 | }
40 |
41 | @Benchmark
42 | public String testPercentDecodeLarge(ThreadState state) throws CharacterCodingException {
43 | return state.decoder.decode(LARGE_STRING_ENCODED);
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/jmh/java/com/palominolabs/http/url/PercentEncoderBenchmark.java:
--------------------------------------------------------------------------------
1 | package com.palominolabs.http.url;
2 |
3 | import com.google.common.base.Strings;
4 | import org.openjdk.jmh.annotations.Benchmark;
5 | import org.openjdk.jmh.annotations.Scope;
6 | import org.openjdk.jmh.annotations.State;
7 |
8 | import java.nio.charset.CharacterCodingException;
9 |
10 | public class PercentEncoderBenchmark {
11 |
12 | // safe and unsafe
13 | static final String TINY_STRING_MIX = "foo bar baz";
14 | static final String SMALL_STRING_MIX = "small value !@#$%^&*()???????????????!@#$%^&*()";
15 | // no characters escaped
16 | static final String SMALL_STRING_ALL_SAFE = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
17 | // all characters escaped
18 | static final String SMALL_STRING_ALL_UNSAFE = "???????????????????????????????????????????????";
19 |
20 | static final String LARGE_STRING_MIX;
21 | static final String LARGE_STRING_ALL_SAFE;
22 | static final String LARGE_STRING_ALL_UNSAFE;
23 |
24 | static {
25 | LARGE_STRING_MIX = Strings.repeat(SMALL_STRING_MIX, 1000);
26 | LARGE_STRING_ALL_SAFE = Strings.repeat(SMALL_STRING_ALL_SAFE, 1000);
27 | LARGE_STRING_ALL_UNSAFE = Strings.repeat(SMALL_STRING_ALL_UNSAFE, 1000);
28 | }
29 |
30 | @State(Scope.Thread)
31 | public static class ThreadState {
32 | PercentEncoder encoder = UrlPercentEncoders.getUnstructuredQueryEncoder();
33 | PercentEncoderOutputHandler noOpHandler = new NoOpOutputHandler();
34 | AccumXorOutputHandler accumXorHandler = new AccumXorOutputHandler();
35 | }
36 |
37 | @Benchmark
38 | public String testPercentEncodeTinyMix(ThreadState state) throws CharacterCodingException {
39 | return state.encoder.encode(TINY_STRING_MIX);
40 | }
41 |
42 | @Benchmark
43 | public String testPercentEncodeSmallMix(ThreadState state) throws CharacterCodingException {
44 | return state.encoder.encode(SMALL_STRING_MIX);
45 | }
46 |
47 | @Benchmark
48 | public String testPercentEncodeLargeMix(ThreadState state) throws CharacterCodingException {
49 | return state.encoder.encode(LARGE_STRING_MIX);
50 | }
51 |
52 | @Benchmark
53 | public String testPercentEncodeSmallSafe(ThreadState state) throws CharacterCodingException {
54 | return state.encoder.encode(SMALL_STRING_ALL_SAFE);
55 | }
56 |
57 | @Benchmark
58 | public String testPercentEncodeLargeSafe(ThreadState state) throws CharacterCodingException {
59 | return state.encoder.encode(LARGE_STRING_ALL_SAFE);
60 | }
61 |
62 | @Benchmark
63 | public String testPercentEncodeSmallUnsafe(ThreadState state) throws CharacterCodingException {
64 | return state.encoder.encode(SMALL_STRING_ALL_UNSAFE);
65 | }
66 |
67 | @Benchmark
68 | public String testPercentEncodeLargeUnsafe(ThreadState state) throws CharacterCodingException {
69 | return state.encoder.encode(LARGE_STRING_ALL_UNSAFE);
70 | }
71 |
72 | @Benchmark
73 | public void testPercentEncodeSmallNoOpMix(ThreadState state) throws CharacterCodingException {
74 | state.encoder.encode(SMALL_STRING_MIX, state.noOpHandler);
75 | }
76 |
77 | @Benchmark
78 | public void testPercentEncodeLargeNoOpMix(ThreadState state) throws CharacterCodingException {
79 | state.encoder.encode(LARGE_STRING_MIX, state.noOpHandler);
80 | }
81 |
82 | @Benchmark
83 | public char testPercentEncodeSmallAccumXorMix(ThreadState state) throws CharacterCodingException {
84 | state.encoder.encode(SMALL_STRING_MIX, state.accumXorHandler);
85 | return state.accumXorHandler.c;
86 | }
87 |
88 | @Benchmark
89 | public char testPercentEncodeLargeAccumXorMix(ThreadState state) throws CharacterCodingException {
90 | state.encoder.encode(LARGE_STRING_MIX, state.accumXorHandler);
91 | return state.accumXorHandler.c;
92 | }
93 |
94 | static class NoOpOutputHandler implements PercentEncoderOutputHandler {
95 |
96 | @Override
97 | public void onOutputChar(char c) {
98 | // no op
99 | }
100 | }
101 |
102 | /**
103 | * A handler that doesn't allocate, but can't be optimized away
104 | */
105 | static class AccumXorOutputHandler implements PercentEncoderOutputHandler {
106 | char c;
107 |
108 | @Override
109 | public void onOutputChar(char c) {
110 | this.c ^= c;
111 | }
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/src/jmh/java/com/palominolabs/http/url/URLDecoderBenchmark.java:
--------------------------------------------------------------------------------
1 | package com.palominolabs.http.url;
2 |
3 | import org.openjdk.jmh.annotations.Benchmark;
4 |
5 | import java.io.UnsupportedEncodingException;
6 | import java.net.URLDecoder;
7 | import java.nio.charset.CharacterCodingException;
8 |
9 | import static com.palominolabs.http.url.PercentDecoderBenchmark.LARGE_STRING_ENCODED;
10 | import static com.palominolabs.http.url.PercentDecoderBenchmark.SMALL_STRING_ENCODED;
11 |
12 | public class URLDecoderBenchmark {
13 |
14 | @Benchmark
15 | public String testUrlDecodeSmall() throws CharacterCodingException, UnsupportedEncodingException {
16 | return URLDecoder.decode(SMALL_STRING_ENCODED, "UTF-8");
17 | }
18 |
19 | @Benchmark
20 | public String testUrlDecodeLarge() throws CharacterCodingException, UnsupportedEncodingException {
21 | return URLDecoder.decode(LARGE_STRING_ENCODED, "UTF-8");
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/jmh/java/com/palominolabs/http/url/URLEncoderBenchmark.java:
--------------------------------------------------------------------------------
1 | package com.palominolabs.http.url;
2 |
3 | import org.openjdk.jmh.annotations.Benchmark;
4 |
5 | import java.io.UnsupportedEncodingException;
6 | import java.net.URLEncoder;
7 | import java.nio.charset.CharacterCodingException;
8 |
9 | import static com.palominolabs.http.url.PercentEncoderBenchmark.LARGE_STRING_MIX;
10 | import static com.palominolabs.http.url.PercentEncoderBenchmark.SMALL_STRING_MIX;
11 |
12 | public class URLEncoderBenchmark {
13 |
14 | @Benchmark
15 | public String testUrlEncodeSmall() throws CharacterCodingException, UnsupportedEncodingException {
16 | return URLEncoder.encode(SMALL_STRING_MIX, "UTF-8");
17 | }
18 |
19 | @Benchmark
20 | public String testUrlEncodeLarge() throws CharacterCodingException, UnsupportedEncodingException {
21 | return URLEncoder.encode(LARGE_STRING_MIX, "UTF-8");
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/com/palominolabs/http/url/PercentDecoder.java:
--------------------------------------------------------------------------------
1 | package com.palominolabs.http.url;
2 |
3 | import javax.annotation.Nonnull;
4 | import javax.annotation.concurrent.NotThreadSafe;
5 | import java.nio.ByteBuffer;
6 | import java.nio.CharBuffer;
7 | import java.nio.charset.CharsetDecoder;
8 | import java.nio.charset.CoderResult;
9 | import java.nio.charset.MalformedInputException;
10 | import java.nio.charset.UnmappableCharacterException;
11 |
12 | import static java.nio.charset.CoderResult.OVERFLOW;
13 | import static java.nio.charset.CoderResult.UNDERFLOW;
14 |
15 | /**
16 | * Decodes percent-encoded (%XX) Unicode text.
17 | */
18 | @NotThreadSafe
19 | public final class PercentDecoder {
20 |
21 | /**
22 | * bytes represented by the current sequence of %-triples. Resized as needed.
23 | */
24 | private ByteBuffer encodedBuf;
25 |
26 | /**
27 | * Written to with decoded chars by decoder
28 | */
29 | private final CharBuffer decodedCharBuf;
30 | private final CharsetDecoder decoder;
31 |
32 | /**
33 | * The decoded string for the current input
34 | */
35 | private final StringBuilder outputBuf = new StringBuilder();
36 |
37 | /**
38 | * Construct a new PercentDecoder with default buffer sizes.
39 | *
40 | * @param charsetDecoder Charset to decode bytes into chars with
41 | * @see PercentDecoder#PercentDecoder(CharsetDecoder, int, int)
42 | */
43 | public PercentDecoder(@Nonnull CharsetDecoder charsetDecoder) {
44 | this(charsetDecoder, 16, 16);
45 | }
46 |
47 | /**
48 | * @param charsetDecoder Charset to decode bytes into chars with
49 | * @param initialEncodedByteBufSize Initial size of buffer that holds encoded bytes
50 | * @param decodedCharBufSize Size of buffer that encoded bytes are decoded into
51 | */
52 | public PercentDecoder(@Nonnull CharsetDecoder charsetDecoder, int initialEncodedByteBufSize,
53 | int decodedCharBufSize) {
54 | encodedBuf = ByteBuffer.allocate(initialEncodedByteBufSize);
55 | decodedCharBuf = CharBuffer.allocate(decodedCharBufSize);
56 | decoder = charsetDecoder;
57 | }
58 |
59 | /**
60 | * @param input Input with %-encoded representation of characters in this instance's configured character set, e.g.
61 | * "%20" for a space character
62 | * @return Corresponding string with %-encoded data decoded and converted to their corresponding characters
63 | * @throws MalformedInputException if decoder is configured to report errors and malformed input is detected
64 | * @throws UnmappableCharacterException if decoder is configured to report errors and an unmappable character is
65 | * detected
66 | */
67 | @Nonnull
68 | public String decode(@Nonnull CharSequence input) throws MalformedInputException, UnmappableCharacterException {
69 | outputBuf.setLength(0);
70 | // this is almost always an underestimate of the size needed:
71 | // only a 4-byte encoding (which is 12 characters input) would case this to be an overestimate
72 | outputBuf.ensureCapacity(input.length() / 8);
73 | encodedBuf.clear();
74 |
75 | for (int i = 0; i < input.length(); i++) {
76 | char c = input.charAt(i);
77 | if (c != '%') {
78 | handleEncodedBytes();
79 |
80 | outputBuf.append(c);
81 | continue;
82 | }
83 |
84 | if (i + 2 >= input.length()) {
85 | throw new IllegalArgumentException(
86 | "Could not percent decode <" + input + ">: incomplete %-pair at position " + i);
87 | }
88 |
89 | // grow the byte buf if needed
90 | if (encodedBuf.remaining() == 0) {
91 | ByteBuffer largerBuf = ByteBuffer.allocate(encodedBuf.capacity() * 2);
92 | encodedBuf.flip();
93 | largerBuf.put(encodedBuf);
94 | encodedBuf = largerBuf;
95 | }
96 |
97 | // note that we advance i here as we consume chars
98 | int msBits = Character.digit(input.charAt(++i), 16);
99 | int lsBits = Character.digit(input.charAt(++i), 16);
100 |
101 | if (msBits == -1 || lsBits == -1) {
102 | throw new IllegalArgumentException("Invalid %-tuple <" + input.subSequence(i - 2, i + 1) + ">");
103 | }
104 |
105 | msBits <<= 4;
106 | msBits |= lsBits;
107 |
108 | // msBits can only have 8 bits set, so cast is safe
109 | encodedBuf.put((byte) msBits);
110 | }
111 |
112 | handleEncodedBytes();
113 |
114 | return outputBuf.toString();
115 | }
116 |
117 | /**
118 | * Decode any buffered encoded bytes and write them to the output buf.
119 | */
120 | private void handleEncodedBytes() throws MalformedInputException, UnmappableCharacterException {
121 | if (encodedBuf.position() == 0) {
122 | // nothing to do
123 | return;
124 | }
125 |
126 | decoder.reset();
127 | CoderResult coderResult;
128 |
129 | // switch to reading mode
130 | encodedBuf.flip();
131 |
132 | // loop while we're filling up the decoded char buf, or there's any encoded bytes
133 | // decode() in practice seems to only consume bytes when it can decode an entire char...
134 | do {
135 | decodedCharBuf.clear();
136 | coderResult = decoder.decode(encodedBuf, decodedCharBuf, false);
137 | throwIfError(coderResult);
138 | appendDecodedChars();
139 | } while (coderResult == OVERFLOW && encodedBuf.hasRemaining());
140 |
141 | // final decode with end-of-input flag
142 | decodedCharBuf.clear();
143 | coderResult = decoder.decode(encodedBuf, decodedCharBuf, true);
144 | throwIfError(coderResult);
145 |
146 | if (encodedBuf.hasRemaining()) {
147 | throw new IllegalStateException("Final decode didn't error, but didn't consume remaining input bytes");
148 | }
149 | if (coderResult != UNDERFLOW) {
150 | throw new IllegalStateException("Expected underflow, but instead final decode returned " + coderResult);
151 | }
152 |
153 | appendDecodedChars();
154 |
155 | // we've finished the input, wrap it up
156 | encodedBuf.clear();
157 | flush();
158 | }
159 |
160 | /**
161 | * Must only be called when the input encoded bytes buffer is empty
162 | */
163 | private void flush() throws MalformedInputException, UnmappableCharacterException {
164 | CoderResult coderResult;
165 | decodedCharBuf.clear();
166 |
167 | coderResult = decoder.flush(decodedCharBuf);
168 | appendDecodedChars();
169 |
170 | throwIfError(coderResult);
171 |
172 | if (coderResult != UNDERFLOW) {
173 | throw new IllegalStateException("Decoder flush resulted in " + coderResult);
174 | }
175 | }
176 |
177 | /**
178 | * If coderResult is considered an error (i.e. not overflow or underflow), throw the corresponding
179 | * CharacterCodingException.
180 | *
181 | * @param coderResult result to check
182 | * @throws MalformedInputException if result represents malformed input
183 | * @throws UnmappableCharacterException if result represents an unmappable character
184 | */
185 | private void throwIfError(CoderResult coderResult) throws MalformedInputException, UnmappableCharacterException {
186 | if (coderResult.isMalformed()) {
187 | throw new MalformedInputException(coderResult.length());
188 | }
189 | if (coderResult.isUnmappable()) {
190 | throw new UnmappableCharacterException(coderResult.length());
191 | } }
192 |
193 | /**
194 | * Flip the decoded char buf and append it to the string bug
195 | */
196 | private void appendDecodedChars() {
197 | decodedCharBuf.flip();
198 | outputBuf.append(decodedCharBuf);
199 | }
200 | }
201 |
--------------------------------------------------------------------------------
/src/main/java/com/palominolabs/http/url/PercentEncoder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Palomino Labs, Inc.
3 | */
4 |
5 | package com.palominolabs.http.url;
6 |
7 | import javax.annotation.Nonnull;
8 | import javax.annotation.concurrent.NotThreadSafe;
9 | import java.nio.ByteBuffer;
10 | import java.nio.CharBuffer;
11 | import java.nio.charset.CharsetEncoder;
12 | import java.nio.charset.CoderResult;
13 | import java.nio.charset.MalformedInputException;
14 | import java.nio.charset.UnmappableCharacterException;
15 | import java.util.BitSet;
16 |
17 | import static java.lang.Character.isHighSurrogate;
18 | import static java.lang.Character.isLowSurrogate;
19 |
20 | /**
21 | * Encodes unsafe characters as a sequence of %XX hex-encoded bytes.
22 | *
23 | * This is typically done when encoding components of URLs. See {@link UrlPercentEncoders} for pre-configured
24 | * PercentEncoder instances.
25 | */
26 | @NotThreadSafe
27 | public final class PercentEncoder {
28 |
29 | private static final char[] HEX_CODE = "0123456789ABCDEF".toCharArray();
30 |
31 | private final BitSet safeChars;
32 | private final CharsetEncoder encoder;
33 | /**
34 | * Pre-allocate a string handler to make the common case of encoding to a string faster
35 | */
36 | private final StringBuilderPercentEncoderOutputHandler stringHandler = new StringBuilderPercentEncoderOutputHandler();
37 | private final ByteBuffer encodedBytes;
38 | private final CharBuffer unsafeCharsToEncode;
39 |
40 | /**
41 | * @param safeChars the set of chars to NOT encode, stored as a bitset with the int positions corresponding to
42 | * those chars set to true. Treated as read only.
43 | * @param charsetEncoder charset encoder to encode characters with. Make sure to not re-use CharsetEncoder instances
44 | * across threads.
45 | */
46 | public PercentEncoder(@Nonnull BitSet safeChars, @Nonnull CharsetEncoder charsetEncoder) {
47 | this.safeChars = safeChars;
48 | this.encoder = charsetEncoder;
49 |
50 | // why is this a float? sigh.
51 | int maxBytesPerChar = 1 + (int) encoder.maxBytesPerChar();
52 | // need to handle surrogate pairs, so need to be able to handle 2 chars worth of stuff at once
53 | encodedBytes = ByteBuffer.allocate(maxBytesPerChar * 2);
54 | unsafeCharsToEncode = CharBuffer.allocate(2);
55 | }
56 |
57 | /**
58 | * Encode the input and pass output chars to a handler.
59 | *
60 | * @param input input string
61 | * @param handler handler to call on each output character
62 | * @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
63 | * @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
64 | * detected
65 | */
66 | public void encode(@Nonnull CharSequence input, @Nonnull PercentEncoderOutputHandler handler) throws
67 | MalformedInputException, UnmappableCharacterException {
68 |
69 | for (int i = 0; i < input.length(); i++) {
70 |
71 | char c = input.charAt(i);
72 |
73 | if (safeChars.get(c)) {
74 | handler.onOutputChar(c);
75 | continue;
76 | }
77 |
78 | // not a safe char
79 | unsafeCharsToEncode.clear();
80 | unsafeCharsToEncode.append(c);
81 | if (isHighSurrogate(c)) {
82 | if (input.length() > i + 1) {
83 | // get the low surrogate as well
84 | char lowSurrogate = input.charAt(i + 1);
85 | if (isLowSurrogate(lowSurrogate)) {
86 | unsafeCharsToEncode.append(lowSurrogate);
87 | i++;
88 | } else {
89 | throw new IllegalArgumentException(
90 | "Invalid UTF-16: Char " + (i) + " is a high surrogate (\\u" + Integer
91 | .toHexString(c) + "), but char " + (i + 1) + " is not a low surrogate (\\u" + Integer
92 | .toHexString(lowSurrogate) + ")");
93 | }
94 | } else {
95 | throw new IllegalArgumentException(
96 | "Invalid UTF-16: The last character in the input string was a high surrogate (\\u" + Integer
97 | .toHexString(c) + ")");
98 | }
99 | }
100 |
101 | flushUnsafeCharBuffer(handler);
102 | }
103 | }
104 |
105 | /**
106 | * Encode the input and return the resulting text as a String.
107 | *
108 | * @param input input string
109 | * @return the input string with every character that's not in safeChars turned into its byte representation via the
110 | * instance's encoder and then percent-encoded
111 | * @throws MalformedInputException if encoder is configured to report errors and malformed input is detected
112 | * @throws UnmappableCharacterException if encoder is configured to report errors and an unmappable character is
113 | * detected
114 | */
115 | @Nonnull
116 | public String encode(@Nonnull CharSequence input) throws MalformedInputException, UnmappableCharacterException {
117 | stringHandler.reset();
118 | stringHandler.ensureCapacity(input.length());
119 | encode(input, stringHandler);
120 | return stringHandler.getContents();
121 | }
122 |
123 | /**
124 | * Encode unsafeCharsToEncode to bytes as per charsetEncoder, then percent-encode those bytes into output.
125 | *
126 | * Side effects: unsafeCharsToEncode will be read from and cleared. encodedBytes will be cleared and written to.
127 | *
128 | * @param handler where the encoded versions of the contents of unsafeCharsToEncode will be written
129 | */
130 | private void flushUnsafeCharBuffer(PercentEncoderOutputHandler handler) throws MalformedInputException,
131 | UnmappableCharacterException {
132 | // need to read from the char buffer, which was most recently written to
133 | unsafeCharsToEncode.flip();
134 |
135 | encodedBytes.clear();
136 |
137 | encoder.reset();
138 | CoderResult result = encoder.encode(unsafeCharsToEncode, encodedBytes, true);
139 | checkResult(result);
140 | result = encoder.flush(encodedBytes);
141 | checkResult(result);
142 |
143 | // read contents of bytebuffer
144 | encodedBytes.flip();
145 |
146 | while (encodedBytes.hasRemaining()) {
147 | byte b = encodedBytes.get();
148 |
149 | handler.onOutputChar('%');
150 | handler.onOutputChar(HEX_CODE[b >> 4 & 0xF]);
151 | handler.onOutputChar(HEX_CODE[b & 0xF]);
152 | }
153 | }
154 |
155 | /**
156 | * @param result result to check
157 | * @throws IllegalStateException if result is overflow
158 | * @throws MalformedInputException if result represents malformed input
159 | * @throws UnmappableCharacterException if result represents an unmappable character
160 | */
161 | private static void checkResult(CoderResult result) throws MalformedInputException, UnmappableCharacterException {
162 | if (result.isOverflow()) {
163 | throw new IllegalStateException("Byte buffer overflow; this should not happen.");
164 | }
165 | if (result.isMalformed()) {
166 | throw new MalformedInputException(result.length());
167 | }
168 | if (result.isUnmappable()) {
169 | throw new UnmappableCharacterException(result.length());
170 | }
171 | }
172 | }
173 |
--------------------------------------------------------------------------------
/src/main/java/com/palominolabs/http/url/PercentEncoderOutputHandler.java:
--------------------------------------------------------------------------------
1 | package com.palominolabs.http.url;
2 |
3 | import javax.annotation.concurrent.NotThreadSafe;
4 |
5 | /**
6 | * A callback used during percent encoding.
7 | */
8 | @NotThreadSafe
9 | public interface PercentEncoderOutputHandler {
10 | /**
11 | * Called on each character output by a PercentEncoder.
12 | *
13 | * @param c output character that's either in the calling PercentEncoder's safe char set or part of a
14 | * percent-hex-encoded triple, e.g. "%FF".
15 | */
16 | void onOutputChar(char c);
17 | }
18 |
--------------------------------------------------------------------------------
/src/main/java/com/palominolabs/http/url/StringBuilderPercentEncoderOutputHandler.java:
--------------------------------------------------------------------------------
1 | package com.palominolabs.http.url;
2 |
3 | import javax.annotation.Nonnull;
4 | import javax.annotation.concurrent.NotThreadSafe;
5 |
6 | /**
7 | * A PercentEncoderHandler implementation that accumulates chars in a buffer.
8 | */
9 | @NotThreadSafe
10 | public final class StringBuilderPercentEncoderOutputHandler implements PercentEncoderOutputHandler {
11 |
12 | private final StringBuilder stringBuilder;
13 |
14 | /**
15 | * Create a new handler with a default size StringBuilder.
16 | */
17 | public StringBuilderPercentEncoderOutputHandler() {
18 | stringBuilder = new StringBuilder();
19 | }
20 |
21 | /**
22 | * @return A string containing the chars accumulated since the last call to reset()
23 | */
24 | @Nonnull
25 | public String getContents() {
26 | return stringBuilder.toString();
27 | }
28 |
29 | /**
30 | * Clear the buffer.
31 | */
32 | public void reset() {
33 | stringBuilder.setLength(0);
34 | }
35 |
36 | /**
37 | * Ensure the internal buffer has enough capacity for the specified length of input.
38 | *
39 | * @param length length to ensure capacity for
40 | */
41 | public void ensureCapacity(int length) {
42 | stringBuilder.ensureCapacity(length);
43 | }
44 |
45 | @Override
46 | public void onOutputChar(char c) {
47 | stringBuilder.append(c);
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/com/palominolabs/http/url/UrlBuilder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Palomino Labs, Inc.
3 | */
4 |
5 | package com.palominolabs.http.url;
6 |
7 | import java.net.URL;
8 | import java.nio.charset.CharacterCodingException;
9 | import java.nio.charset.CharsetDecoder;
10 | import java.util.ArrayList;
11 | import java.util.Iterator;
12 | import java.util.List;
13 | import java.util.regex.Pattern;
14 | import javax.annotation.Nonnull;
15 | import javax.annotation.Nullable;
16 | import javax.annotation.concurrent.NotThreadSafe;
17 |
18 | import static com.palominolabs.http.url.UrlPercentEncoders.getFragmentEncoder;
19 | import static com.palominolabs.http.url.UrlPercentEncoders.getMatrixEncoder;
20 | import static com.palominolabs.http.url.UrlPercentEncoders.getPathEncoder;
21 | import static com.palominolabs.http.url.UrlPercentEncoders.getQueryParamEncoder;
22 | import static com.palominolabs.http.url.UrlPercentEncoders.getRegNameEncoder;
23 | import static com.palominolabs.http.url.UrlPercentEncoders.getUnstructuredQueryEncoder;
24 | import static java.nio.charset.StandardCharsets.UTF_8;
25 |
26 | /**
27 | * Builder for urls with url-encoding applied to path, query param, etc.
28 | *
29 | * Escaping rules are from RFC 3986, RFC 1738 and the HTML 4 spec (http://www.w3.org/TR/html401/interact/forms.html#form-content-type).
30 | * This means that this diverges from the canonical URI/URL rules for the sake of being what you want to actually make
31 | * HTTP-useful URLs.
32 | */
33 | @NotThreadSafe
34 | public final class UrlBuilder {
35 |
36 | /**
37 | * IPv6 address, cribbed from http://stackoverflow.com/questions/46146/what-are-the-java-regular-expressions-for-matching-ipv4-and-ipv6-strings
38 | */
39 | private static final Pattern IPV6_PATTERN = Pattern
40 | .compile(
41 | "\\A\\[((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)::((?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)]\\z");
42 |
43 | /**
44 | * IPv4 dotted quad
45 | */
46 | private static final Pattern IPV4_PATTERN = Pattern
47 | .compile("\\A(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}\\z");
48 |
49 | @Nonnull
50 | private final String scheme;
51 |
52 | @Nonnull
53 | private final String host;
54 |
55 | @Nullable
56 | private final Integer port;
57 |
58 | private final List> queryParams = new ArrayList<>();
59 |
60 | /**
61 | * If this is non-null, queryParams must be empty, and vice versa.
62 | */
63 | @Nullable
64 | private String unstructuredQuery;
65 |
66 | private final List pathSegments = new ArrayList<>();
67 |
68 | private final PercentEncoder pathEncoder = getPathEncoder();
69 | private final PercentEncoder regNameEncoder = getRegNameEncoder();
70 | private final PercentEncoder matrixEncoder = getMatrixEncoder();
71 | private final PercentEncoder queryParamEncoder = getQueryParamEncoder();
72 | private final PercentEncoder unstructuredQueryEncoder = getUnstructuredQueryEncoder();
73 | private final PercentEncoder fragmentEncoder = getFragmentEncoder();
74 |
75 | @Nullable
76 | private String fragment;
77 |
78 | private boolean forceTrailingSlash = false;
79 |
80 | /**
81 | * Create a URL with UTF-8 encoding.
82 | *
83 | * @param scheme scheme (e.g. http)
84 | * @param host host (e.g. foo.com or 1.2.3.4 or [::1])
85 | * @param port null or a positive integer
86 | */
87 | private UrlBuilder(@Nonnull String scheme, @Nonnull String host, @Nullable Integer port) {
88 | this.host = host;
89 | this.scheme = scheme;
90 | this.port = port;
91 | }
92 |
93 | /**
94 | * Create a URL with an null port and UTF-8 encoding.
95 | *
96 | * @param scheme scheme (e.g. http)
97 | * @param host host in any of the valid syntaxes: reg-name (a dns name), ipv4 literal (1.2.3.4), ipv6 literal
98 | * ([::1]), excluding IPvFuture since no one uses that in practice
99 | * @return a url builder
100 | * @see UrlBuilder#forHost(String scheme, String host, int port)
101 | */
102 | public static UrlBuilder forHost(@Nonnull String scheme, @Nonnull String host) {
103 | return new UrlBuilder(scheme, host, null);
104 | }
105 |
106 | /**
107 | * @param scheme scheme (e.g. http)
108 | * @param host host in any of the valid syntaxes: reg-name ( a dns name), ipv4 literal (1.2.3.4), ipv6 literal
109 | * ([::1]), excluding IPvFuture since no one uses that in practice
110 | * @param port port
111 | * @return a url builder
112 | */
113 | public static UrlBuilder forHost(@Nonnull String scheme, @Nonnull String host, int port) {
114 | return new UrlBuilder(scheme, host, port);
115 | }
116 |
117 | /**
118 | * Calls {@link UrlBuilder#fromUrl(URL, CharsetDecoder)} with a UTF-8 CharsetDecoder. The same semantics about the
119 | * query string apply.
120 | *
121 | * @param url url to initialize builder with
122 | * @return a UrlBuilder containing the host, path, etc. from the url
123 | * @throws CharacterCodingException if char decoding fails
124 | * @see UrlBuilder#fromUrl(URL, CharsetDecoder)
125 | */
126 | @Nonnull
127 | public static UrlBuilder fromUrl(@Nonnull URL url) throws CharacterCodingException {
128 | return fromUrl(url, UTF_8.newDecoder());
129 | }
130 |
131 | /**
132 | * Create a UrlBuilder initialized with the contents of a {@link URL}.
133 | *
134 | * The query string will be parsed into HTML4 query params if it can be separated into a
135 | * &
-separated sequence of key=value
pairs. The sequence of query params can then be
136 | * appended to by continuing to call {@link UrlBuilder#queryParam(String, String)}. The concept of query params is
137 | * only part of the HTML spec (and common HTTP usage), though, so it's perfectly legal to have a query string that
138 | * is in some other form. To represent this case, if the aforementioned param-parsing attempt fails, the query
139 | * string will be treated as just a monolithic, unstructured, string. In this case, calls to {@link
140 | * UrlBuilder#queryParam(String, String)} on the resulting instance will throw IllegalStateException, and only calls
141 | * to {@link UrlBuilder#unstructuredQuery(String)}}, which replaces the entire query string, are allowed.
142 | *
143 | * @param url url to initialize builder with
144 | * @param charsetDecoder the decoder to decode encoded bytes with (except for reg names, which are always UTF-8)
145 | * @return a UrlBuilder containing the host, path, etc. from the url
146 | * @throws CharacterCodingException if decoding percent-encoded bytes fails and charsetDecoder is configured to
147 | * report errors
148 | * @see UrlBuilder#fromUrl(URL, CharsetDecoder)
149 | */
150 | @Nonnull
151 | public static UrlBuilder fromUrl(@Nonnull URL url, @Nonnull CharsetDecoder charsetDecoder) throws
152 | CharacterCodingException {
153 |
154 | PercentDecoder decoder = new PercentDecoder(charsetDecoder);
155 | // reg names must be encoded UTF-8
156 | PercentDecoder regNameDecoder;
157 | if (charsetDecoder.charset().equals(UTF_8)) {
158 | regNameDecoder = decoder;
159 | } else {
160 | regNameDecoder = new PercentDecoder(UTF_8.newDecoder());
161 | }
162 |
163 | Integer port = url.getPort();
164 | if (port == -1) {
165 | port = null;
166 | }
167 |
168 | UrlBuilder builder = new UrlBuilder(url.getProtocol(), regNameDecoder.decode(url.getHost()), port);
169 |
170 | buildFromPath(builder, decoder, url);
171 |
172 | buildFromQuery(builder, decoder, url);
173 |
174 | if (url.getRef() != null) {
175 | builder.fragment(decoder.decode(url.getRef()));
176 | }
177 |
178 | return builder;
179 | }
180 |
181 | /**
182 | * Add a path segment.
183 | *
184 | * @param segment a path segment
185 | * @return this
186 | */
187 | @Nonnull
188 | public UrlBuilder pathSegment(@Nonnull String segment) {
189 | pathSegments.add(new PathSegment(segment));
190 | return this;
191 | }
192 |
193 | /**
194 | * Add multiple path segments. Equivalent to successive calls to {@link UrlBuilder#pathSegment(String)}.
195 | *
196 | * @param segments path segments
197 | * @return this
198 | */
199 | @Nonnull
200 | public UrlBuilder pathSegments(String... segments) {
201 | for (String segment : segments) {
202 | pathSegment(segment);
203 | }
204 |
205 | return this;
206 | }
207 |
208 | /**
209 | * Add an HTML query parameter. Query parameters will be encoded in the order added.
210 | *
211 | * Using query strings to encode key=value pairs is not part of the URI/URL specification; it is specified by
212 | * http://www.w3.org/TR/html401/interact/forms.html#form-content-type.
213 | *
214 | * If you use this method to build a query string, or created this builder from a url with a query string that can
215 | * successfully be parsed into query param pairs, you cannot subsequently use {@link
216 | * UrlBuilder#unstructuredQuery(String)}. See {@link UrlBuilder#fromUrl(URL, CharsetDecoder)}.
217 | *
218 | * @param name param name
219 | * @param value param value
220 | * @return this
221 | */
222 | @Nonnull
223 | public UrlBuilder queryParam(@Nonnull String name, @Nonnull String value) {
224 | if (unstructuredQuery != null) {
225 | throw new IllegalStateException(
226 | "Cannot call queryParam() when this already has an unstructured query specified");
227 | }
228 |
229 | queryParams.add(Pair.of(name, value));
230 | return this;
231 | }
232 |
233 | /**
234 | * Set the complete query string of arbitrary structure. This is useful when you want to specify a query string that
235 | * is not of key=value format. If the query has previously been set via this method, subsequent calls will overwrite
236 | * that query.
237 | *
238 | * If you use this method, or create a builder from a URL whose query is not parseable into query param pairs, you
239 | * cannot subsequently use {@link UrlBuilder#queryParam(String, String)}. See {@link UrlBuilder#fromUrl(URL,
240 | * CharsetDecoder)}.
241 | *
242 | * @param query Complete URI query, as specified by https://tools.ietf.org/html/rfc3986#section-3.4
243 | * @return this
244 | */
245 | @Nonnull
246 | public UrlBuilder unstructuredQuery(@Nonnull String query) {
247 | if (!queryParams.isEmpty()) {
248 | throw new IllegalStateException(
249 | "Cannot call unstructuredQuery() when this already has queryParam pairs specified");
250 | }
251 |
252 | unstructuredQuery = query;
253 |
254 | return this;
255 | }
256 |
257 | /**
258 | * Clear the unstructured query and any query params.
259 | *
260 | * Since the query / query param situation is a little complicated, this method will let you remove all query
261 | * information and start again from scratch. This may be useful when taking an existing url, parsing it into a
262 | * builder, and then re-doing its query params, for instance.
263 | *
264 | * @return this
265 | */
266 | @Nonnull
267 | public UrlBuilder clearQuery() {
268 | queryParams.clear();
269 | unstructuredQuery = null;
270 |
271 | return this;
272 | }
273 |
274 | /**
275 | * Add a matrix param to the last added path segment. If no segments have been added, the param will be added to the
276 | * root. Matrix params will be encoded in the order added.
277 | *
278 | * @param name param name
279 | * @param value param value
280 | * @return this
281 | */
282 | @Nonnull
283 | public UrlBuilder matrixParam(@Nonnull String name, @Nonnull String value) {
284 | if (pathSegments.isEmpty()) {
285 | // create an empty path segment to represent a matrix param applied to the root
286 | pathSegment("");
287 | }
288 |
289 | PathSegment seg = pathSegments.get(pathSegments.size() - 1);
290 | seg.matrixParams.add(Pair.of(name, value));
291 | return this;
292 | }
293 |
294 | /**
295 | * Set the fragment.
296 | *
297 | * @param fragment fragment string
298 | * @return this
299 | */
300 | @Nonnull
301 | public UrlBuilder fragment(@Nonnull String fragment) {
302 | this.fragment = fragment;
303 | return this;
304 | }
305 |
306 | /**
307 | * Force the generated URL to have a trailing slash at the end of the path.
308 | *
309 | * @return this
310 | */
311 | @Nonnull
312 | public UrlBuilder forceTrailingSlash() {
313 | forceTrailingSlash = true;
314 | return this;
315 | }
316 |
317 | /**
318 | * Encode the current builder state into a URL string.
319 | *
320 | * @return a well-formed URL string
321 | * @throws CharacterCodingException if character encoding fails and the encoder is configured to report errors
322 | */
323 | public String toUrlString() throws CharacterCodingException {
324 | StringBuilder buf = new StringBuilder();
325 |
326 | buf.append(scheme);
327 | buf.append("://");
328 |
329 | buf.append(encodeHost(host));
330 | if (port != null) {
331 | buf.append(':');
332 | buf.append(port);
333 | }
334 |
335 | for (PathSegment pathSegment : pathSegments) {
336 | buf.append('/');
337 | buf.append(pathEncoder.encode(pathSegment.segment));
338 |
339 | for (Pair matrixParam : pathSegment.matrixParams) {
340 | buf.append(';');
341 | buf.append(matrixEncoder.encode(matrixParam.getKey()));
342 | buf.append('=');
343 | buf.append(matrixEncoder.encode(matrixParam.getValue()));
344 | }
345 | }
346 |
347 | if (forceTrailingSlash) {
348 | buf.append('/');
349 | }
350 |
351 | if (!queryParams.isEmpty()) {
352 | buf.append("?");
353 | Iterator> qpIter = queryParams.iterator();
354 | while (qpIter.hasNext()) {
355 | Pair queryParam = qpIter.next();
356 | buf.append(queryParamEncoder.encode(queryParam.getKey()));
357 | buf.append('=');
358 | buf.append(queryParamEncoder.encode(queryParam.getValue()));
359 | if (qpIter.hasNext()) {
360 | buf.append('&');
361 | }
362 | }
363 | } else if (unstructuredQuery != null) {
364 | buf.append("?");
365 | buf.append(unstructuredQueryEncoder.encode(unstructuredQuery));
366 | }
367 |
368 | if (fragment != null) {
369 | buf.append('#');
370 | buf.append(fragmentEncoder.encode(fragment));
371 | }
372 |
373 | return buf.toString();
374 | }
375 |
376 | /**
377 | * Populate a url builder based on the query of a url
378 | *
379 | * @param builder builder
380 | * @param decoder decoder
381 | * @param url url
382 | * @throws CharacterCodingException
383 | */
384 | private static void buildFromQuery(UrlBuilder builder, PercentDecoder decoder, URL url) throws
385 | CharacterCodingException {
386 | if (url.getQuery() != null) {
387 | String q = url.getQuery();
388 |
389 | // try to parse into &-separated key=value pairs
390 | List> pairs = new ArrayList<>();
391 | boolean parseOk = true;
392 |
393 | for (String queryChunk : q.split("&")) {
394 | String[] queryParamChunks = queryChunk.split("=");
395 |
396 | if (queryParamChunks.length != 2) {
397 | parseOk = false;
398 | break;
399 | }
400 |
401 | pairs.add(Pair.of(decoder.decode(queryParamChunks[0]),
402 | decoder.decode(queryParamChunks[1])));
403 | }
404 |
405 | if (parseOk) {
406 | for (Pair pair : pairs) {
407 | builder.queryParam(pair.getKey(), pair.getValue());
408 | }
409 | } else {
410 | builder.unstructuredQuery(decoder.decode(q));
411 | }
412 | }
413 | }
414 |
415 | /**
416 | * Populate the path segments of a url builder from a url
417 | *
418 | * @param builder builder
419 | * @param decoder decoder
420 | * @param url url
421 | * @throws CharacterCodingException
422 | */
423 | private static void buildFromPath(UrlBuilder builder, PercentDecoder decoder, URL url) throws
424 | CharacterCodingException {
425 | for (String pathChunk : url.getPath().split("/")) {
426 | if (pathChunk.equals("")) {
427 | continue;
428 | }
429 |
430 | if (pathChunk.charAt(0) == ';') {
431 | builder.pathSegment("");
432 | // empty path segment, but matrix params
433 | for (String matrixChunk : pathChunk.substring(1).split(";")) {
434 | buildFromMatrixParamChunk(decoder, builder, matrixChunk);
435 | }
436 |
437 | continue;
438 | }
439 |
440 | // otherwise, path chunk is non empty and does not start with a ';'
441 |
442 | String[] matrixChunks = pathChunk.split(";");
443 |
444 | // first chunk is always the path segment. If there is a trailing ; and no matrix params, the ; will
445 | // not be included in the final url.
446 | builder.pathSegment(decoder.decode(matrixChunks[0]));
447 |
448 | // if there any other chunks, they're matrix param pairs
449 | for (int i = 1; i < matrixChunks.length; i++) {
450 | buildFromMatrixParamChunk(decoder, builder, matrixChunks[i]);
451 | }
452 | }
453 | }
454 |
455 | private static void buildFromMatrixParamChunk(PercentDecoder decoder, UrlBuilder ub, String pathMatrixChunk) throws
456 | CharacterCodingException {
457 | String[] mtxPair = pathMatrixChunk.split("=");
458 | if (mtxPair.length != 2) {
459 | throw new IllegalArgumentException("Malformed matrix param: <" + pathMatrixChunk + ">");
460 | }
461 |
462 | String mtxName = mtxPair[0];
463 | String mtxVal = mtxPair[1];
464 | ub.matrixParam(decoder.decode(mtxName), decoder.decode(mtxVal));
465 | }
466 |
467 | /**
468 | * @param host original host string
469 | * @return host encoded as in RFC 3986 section 3.2.2
470 | */
471 | @Nonnull
472 | private String encodeHost(String host) throws CharacterCodingException {
473 | // matching order: IP-literal, IPv4, reg-name
474 | if (IPV4_PATTERN.matcher(host).matches() || IPV6_PATTERN.matcher(host).matches()) {
475 | return host;
476 | }
477 |
478 | // it's a reg-name, which MUST be encoded as UTF-8 (regardless of the rest of the URL)
479 | return regNameEncoder.encode(host);
480 | }
481 |
482 | /**
483 | * Bundle of a path segment name and any associated matrix params.
484 | */
485 | private static class PathSegment {
486 | private final String segment;
487 | private final List> matrixParams = new ArrayList<>();
488 |
489 | PathSegment(String segment) {
490 | this.segment = segment;
491 | }
492 | }
493 |
494 | private static class Pair {
495 |
496 | private final K key;
497 | private final V value;
498 |
499 | private Pair(K key, V value) {
500 | this.key = key;
501 | this.value = value;
502 | }
503 |
504 | K getKey() {
505 | return key;
506 | }
507 |
508 | V getValue() {
509 | return value;
510 | }
511 |
512 | static Pair of(K key, V value) {
513 | return new Pair<>(key, value);
514 | }
515 | }
516 | }
517 |
--------------------------------------------------------------------------------
/src/main/java/com/palominolabs/http/url/UrlPercentEncoders.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Palomino Labs, Inc.
3 | */
4 |
5 | package com.palominolabs.http.url;
6 |
7 | import java.util.BitSet;
8 | import javax.annotation.concurrent.ThreadSafe;
9 |
10 | import static java.nio.charset.CodingErrorAction.REPLACE;
11 | import static java.nio.charset.StandardCharsets.UTF_8;
12 |
13 | /**
14 | * See RFC 3986, RFC 1738 and http://www.lunatech-research.com/archives/2009/02/03/what-every-web-developer-must-know-about-url-encoding.
15 | */
16 | @ThreadSafe
17 | public final class UrlPercentEncoders {
18 |
19 | /**
20 | * an encoder for RFC 3986 reg-names
21 | */
22 |
23 | private static final BitSet REG_NAME_BIT_SET = new BitSet();
24 |
25 | private static final BitSet PATH_BIT_SET = new BitSet();
26 | private static final BitSet MATRIX_BIT_SET = new BitSet();
27 | private static final BitSet UNSTRUCTURED_QUERY_BIT_SET = new BitSet();
28 | private static final BitSet QUERY_PARAM_BIT_SET = new BitSet();
29 | private static final BitSet FRAGMENT_BIT_SET = new BitSet();
30 |
31 | static {
32 | // RFC 3986 'reg-name'. This is not very aggressive... it's quite possible to have DNS-illegal names out of this.
33 | // Regardless, it will at least be URI-compliant even if it's not HTTP URL-compliant.
34 | addUnreserved(REG_NAME_BIT_SET);
35 | addSubdelims(REG_NAME_BIT_SET);
36 |
37 | // Represents RFC 3986 'pchar'. Remove delimiter that starts matrix section.
38 | addPChar(PATH_BIT_SET);
39 | PATH_BIT_SET.clear((int) ';');
40 |
41 | // Remove delims for HTTP matrix params as per RFC 1738 S3.3. The other reserved chars ('/' and '?') are already excluded.
42 | addPChar(MATRIX_BIT_SET);
43 | MATRIX_BIT_SET.clear((int) ';');
44 | MATRIX_BIT_SET.clear((int) '=');
45 |
46 | /*
47 | * At this point it represents RFC 3986 'query'. http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1 also
48 | * specifies that "+" can mean space in a query, so we will make sure to say that '+' is not safe to leave as-is
49 | */
50 | addQuery(UNSTRUCTURED_QUERY_BIT_SET);
51 | UNSTRUCTURED_QUERY_BIT_SET.clear((int) '+');
52 |
53 | /*
54 | * Create more stringent requirements for HTML4 queries: remove delimiters for HTML query params so that key=value
55 | * pairs can be used.
56 | */
57 | QUERY_PARAM_BIT_SET.or(UNSTRUCTURED_QUERY_BIT_SET);
58 | QUERY_PARAM_BIT_SET.clear((int) '=');
59 | QUERY_PARAM_BIT_SET.clear((int) '&');
60 |
61 | addFragment(FRAGMENT_BIT_SET);
62 | }
63 |
64 | /**
65 | * @return a PercentEncoder for RFC 3986 'reg-name' characters
66 | */
67 | public static PercentEncoder getRegNameEncoder() {
68 | return new PercentEncoder(REG_NAME_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE)
69 | .onUnmappableCharacter(REPLACE));
70 | }
71 |
72 | /**
73 | * @return a PercentEncoder for RFC 3986 'pchar'
74 | */
75 | public static PercentEncoder getPathEncoder() {
76 | return new PercentEncoder(PATH_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE)
77 | .onUnmappableCharacter(REPLACE));
78 | }
79 |
80 | /**
81 | * @return a PercentEncoder for RFC 1738 S3.3 matrix params
82 | */
83 | public static PercentEncoder getMatrixEncoder() {
84 | return new PercentEncoder(MATRIX_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE)
85 | .onUnmappableCharacter(REPLACE));
86 | }
87 |
88 | /**
89 | * @return a PercentEncoder for RFC 3986 'query''
90 | */
91 | public static PercentEncoder getUnstructuredQueryEncoder() {
92 | return new PercentEncoder(UNSTRUCTURED_QUERY_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE)
93 | .onUnmappableCharacter(REPLACE));
94 | }
95 |
96 | /**
97 | * @return a PercentEncoder for HTML queries
98 | */
99 | public static PercentEncoder getQueryParamEncoder() {
100 | return new PercentEncoder(QUERY_PARAM_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE)
101 | .onUnmappableCharacter(REPLACE));
102 | }
103 |
104 | /**
105 | * @return a PercentEncoder for fragments
106 | */
107 | public static PercentEncoder getFragmentEncoder() {
108 | return new PercentEncoder(FRAGMENT_BIT_SET, UTF_8.newEncoder().onMalformedInput(REPLACE)
109 | .onUnmappableCharacter(REPLACE));
110 | }
111 |
112 | private UrlPercentEncoders() {
113 | }
114 |
115 | /**
116 | * Add code points for 'fragment' chars
117 | *
118 | * @param fragmentBitSet bit set
119 | */
120 | private static void addFragment(BitSet fragmentBitSet) {
121 | addPChar(fragmentBitSet);
122 | fragmentBitSet.set((int) '/');
123 | fragmentBitSet.set((int) '?');
124 | }
125 |
126 | /**
127 | * Add code points for 'query' chars
128 | *
129 | * @param queryBitSet bit set
130 | */
131 | private static void addQuery(BitSet queryBitSet) {
132 | addPChar(queryBitSet);
133 | queryBitSet.set((int) '/');
134 | queryBitSet.set((int) '?');
135 | }
136 |
137 | /**
138 | * Add code points for 'pchar' chars.
139 | *
140 | * @param bs bitset
141 | */
142 | private static void addPChar(BitSet bs) {
143 | addUnreserved(bs);
144 | addSubdelims(bs);
145 | bs.set((int) ':');
146 | bs.set((int) '@');
147 | }
148 |
149 | /**
150 | * Add codepoints for 'unreserved' chars
151 | *
152 | * @param bs bitset to add codepoints to
153 | */
154 | private static void addUnreserved(BitSet bs) {
155 |
156 | for (int i = 'a'; i <= 'z'; i++) {
157 | bs.set(i);
158 | }
159 | for (int i = 'A'; i <= 'Z'; i++) {
160 | bs.set(i);
161 | }
162 | for (int i = '0'; i <= '9'; i++) {
163 | bs.set(i);
164 | }
165 | bs.set((int) '-');
166 | bs.set((int) '.');
167 | bs.set((int) '_');
168 | bs.set((int) '~');
169 | }
170 |
171 | /**
172 | * Add codepoints for 'sub-delims' chars
173 | *
174 | * @param bs bitset to add codepoints to
175 | */
176 | private static void addSubdelims(BitSet bs) {
177 | bs.set((int) '!');
178 | bs.set((int) '$');
179 | bs.set((int) '&');
180 | bs.set((int) '\'');
181 | bs.set((int) '(');
182 | bs.set((int) ')');
183 | bs.set((int) '*');
184 | bs.set((int) '+');
185 | bs.set((int) ',');
186 | bs.set((int) ';');
187 | bs.set((int) '=');
188 | }
189 | }
190 |
--------------------------------------------------------------------------------
/src/test/java/com/palominolabs/http/url/PercentEncoderTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Palomino Labs, Inc.
3 | */
4 |
5 | package com.palominolabs.http.url;
6 |
7 | import java.nio.charset.CharacterCodingException;
8 | import java.nio.charset.MalformedInputException;
9 | import java.nio.charset.UnmappableCharacterException;
10 | import java.util.BitSet;
11 | import org.junit.jupiter.api.BeforeEach;
12 | import org.junit.jupiter.api.Test;
13 |
14 | import static java.nio.charset.CodingErrorAction.REPLACE;
15 | import static java.nio.charset.StandardCharsets.UTF_16BE;
16 | import static java.nio.charset.StandardCharsets.UTF_8;
17 | import static org.junit.jupiter.api.Assertions.assertEquals;
18 |
19 | public final class PercentEncoderTest {
20 |
21 | private PercentEncoder alnum;
22 | private PercentEncoder alnum16;
23 |
24 | @BeforeEach
25 | public void setUp() {
26 | BitSet bs = new BitSet();
27 | for (int i = 'a'; i <= 'z'; i++) {
28 | bs.set(i);
29 | }
30 | for (int i = 'A'; i <= 'Z'; i++) {
31 | bs.set(i);
32 | }
33 | for (int i = '0'; i <= '9'; i++) {
34 | bs.set(i);
35 | }
36 |
37 | this.alnum = new PercentEncoder(bs, UTF_8.newEncoder().onMalformedInput(REPLACE)
38 | .onUnmappableCharacter(REPLACE));
39 | this.alnum16 = new PercentEncoder(bs, UTF_16BE.newEncoder().onMalformedInput(REPLACE)
40 | .onUnmappableCharacter(REPLACE));
41 | }
42 |
43 | @Test
44 | public void testDoesntEncodeSafe() throws CharacterCodingException {
45 | BitSet set = new BitSet();
46 | for (int i = 'a'; i <= 'z'; i++) {
47 | set.set(i);
48 | }
49 |
50 | PercentEncoder pe = new PercentEncoder(set, UTF_8.newEncoder().onMalformedInput(REPLACE)
51 | .onUnmappableCharacter(REPLACE));
52 | assertEquals("abcd%41%42%43%44", pe.encode("abcdABCD"));
53 | }
54 |
55 | @Test
56 | public void testEncodeInBetweenSafe() throws MalformedInputException, UnmappableCharacterException {
57 | assertEquals("abc%20123", alnum.encode("abc 123"));
58 | }
59 |
60 | @Test
61 | public void testSafeInBetweenEncoded() throws MalformedInputException, UnmappableCharacterException {
62 | assertEquals("%20abc%20", alnum.encode(" abc "));
63 | }
64 |
65 | @Test
66 | public void testEncodeUtf8() throws CharacterCodingException {
67 | // 1 UTF-16 char (unicode snowman)
68 | assertEquals("snowman%E2%98%83", alnum.encode("snowman\u2603"));
69 | }
70 |
71 | @Test
72 | public void testEncodeUtf8SurrogatePair() throws CharacterCodingException {
73 | // musical G clef: 1d11e, has to be represented in surrogate pair form
74 | assertEquals("clef%F0%9D%84%9E", alnum.encode("clef\ud834\udd1e"));
75 | }
76 |
77 | @Test
78 | public void testEncodeUtf16() throws CharacterCodingException {
79 | // 1 UTF-16 char (unicode snowman)
80 | assertEquals("snowman%26%03", alnum16.encode("snowman\u2603"));
81 | }
82 |
83 | @Test
84 | public void testUrlEncodedUtf16SurrogatePair() throws CharacterCodingException {
85 | // musical G clef: 1d11e, has to be represented in surrogate pair form
86 | assertEquals("clef%D8%34%DD%1E", alnum16.encode("clef\ud834\udd1e"));
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/src/test/java/com/palominolabs/http/url/UrlBuilderTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2012 Palomino Labs, Inc.
3 | */
4 |
5 | package com.palominolabs.http.url;
6 |
7 | import java.net.MalformedURLException;
8 | import java.net.URI;
9 | import java.net.URISyntaxException;
10 | import java.net.URL;
11 | import java.nio.charset.CharacterCodingException;
12 | import org.junit.jupiter.api.Test;
13 |
14 | import static com.palominolabs.http.url.UrlBuilder.forHost;
15 | import static com.palominolabs.http.url.UrlBuilder.fromUrl;
16 | import static org.junit.jupiter.api.Assertions.assertEquals;
17 | import static org.junit.jupiter.api.Assertions.fail;
18 |
19 | public final class UrlBuilderTest {
20 |
21 | @Test
22 | public void testNoUrlParts() throws CharacterCodingException {
23 | assertUrlEquals("http://foo.com", forHost("http", "foo.com").toUrlString());
24 | }
25 |
26 | @Test
27 | public void testWithPort() throws CharacterCodingException {
28 | assertUrlEquals("http://foo.com:33", forHost("http", "foo.com", 33).toUrlString());
29 | }
30 |
31 | @Test
32 | public void testSimplePath() throws CharacterCodingException {
33 | UrlBuilder ub = forHost("http", "foo.com");
34 | ub.pathSegment("seg1").pathSegment("seg2");
35 | assertUrlEquals("http://foo.com/seg1/seg2", ub.toUrlString());
36 | }
37 |
38 | @Test
39 | public void testPathWithReserved() throws CharacterCodingException {
40 | // RFC 1738 S3.3
41 | UrlBuilder ub = forHost("http", "foo.com");
42 | ub.pathSegment("seg/;?ment").pathSegment("seg=&2");
43 | assertUrlEquals("http://foo.com/seg%2F%3B%3Fment/seg=&2", ub.toUrlString());
44 | }
45 |
46 | @Test
47 | public void testPathSegments() throws CharacterCodingException {
48 | UrlBuilder ub = forHost("http", "foo.com");
49 | ub.pathSegments("seg1", "seg2", "seg3");
50 | assertUrlEquals("http://foo.com/seg1/seg2/seg3", ub.toUrlString());
51 | }
52 |
53 | @Test
54 | public void testMatrixWithoutPathHasLeadingSlash() throws CharacterCodingException {
55 | UrlBuilder ub = forHost("http", "foo.com");
56 | ub.matrixParam("foo", "bar");
57 | assertUrlEquals("http://foo.com/;foo=bar", ub.toUrlString());
58 | }
59 |
60 | @Test
61 | public void testMatrixWithReserved() throws CharacterCodingException {
62 | UrlBuilder ub = forHost("http", "foo.com")
63 | .pathSegment("foo")
64 | .matrixParam("foo", "bar")
65 | .matrixParam("res;=?#/erved", "value")
66 | .pathSegment("baz");
67 | assertUrlEquals("http://foo.com/foo;foo=bar;res%3B%3D%3F%23%2Ferved=value/baz", ub.toUrlString());
68 | }
69 |
70 | @Test
71 | public void testUrlEncodedPathSegmentUtf8() throws CharacterCodingException {
72 | // 1 UTF-16 char
73 | UrlBuilder ub = forHost("http", "foo.com");
74 | ub.pathSegment("snowman").pathSegment("\u2603");
75 | assertUrlEquals("http://foo.com/snowman/%E2%98%83", ub.toUrlString());
76 | }
77 |
78 | @Test
79 | public void testUrlEncodedPathSegmentUtf8SurrogatePair() throws CharacterCodingException {
80 | UrlBuilder ub = forHost("http", "foo.com");
81 | // musical G clef: 1d11e, has to be represented in surrogate pair form
82 | ub.pathSegment("clef").pathSegment("\ud834\udd1e");
83 | assertUrlEquals("http://foo.com/clef/%F0%9D%84%9E", ub.toUrlString());
84 | }
85 |
86 | @Test
87 | public void testQueryParamNoPath() throws CharacterCodingException {
88 | UrlBuilder ub = forHost("http", "foo.com");
89 | ub.queryParam("foo", "bar");
90 | String s = ub.toUrlString();
91 | assertUrlEquals("http://foo.com?foo=bar", s);
92 | }
93 |
94 | @Test
95 | public void testQueryParamsDuplicated() throws CharacterCodingException {
96 | UrlBuilder ub = forHost("http", "foo.com");
97 | ub.queryParam("foo", "bar");
98 | ub.queryParam("foo", "bar2");
99 | ub.queryParam("baz", "quux");
100 | ub.queryParam("baz", "quux2");
101 | assertUrlEquals("http://foo.com?foo=bar&foo=bar2&baz=quux&baz=quux2", ub.toUrlString());
102 | }
103 |
104 | @Test
105 | public void testEncodeQueryParams() throws CharacterCodingException {
106 | UrlBuilder ub = forHost("http", "foo.com");
107 | ub.queryParam("foo", "bar&=#baz");
108 | ub.queryParam("foo", "bar?/2");
109 | assertUrlEquals("http://foo.com?foo=bar%26%3D%23baz&foo=bar?/2", ub.toUrlString());
110 | }
111 |
112 | @Test
113 | public void testEncodeQueryParamWithSpaceAndPlus() throws CharacterCodingException {
114 | UrlBuilder ub = forHost("http", "foo.com");
115 | ub.queryParam("foo", "spa ce");
116 | ub.queryParam("fo+o", "plus+");
117 | assertUrlEquals("http://foo.com?foo=spa%20ce&fo%2Bo=plus%2B", ub.toUrlString());
118 | }
119 |
120 | @Test
121 | public void testPlusInVariousParts() throws CharacterCodingException {
122 | UrlBuilder ub = forHost("http", "foo.com");
123 |
124 | ub.pathSegment("has+plus")
125 | .matrixParam("plusMtx", "pl+us")
126 | .queryParam("plusQp", "pl+us")
127 | .fragment("plus+frag");
128 |
129 | assertUrlEquals("http://foo.com/has+plus;plusMtx=pl+us?plusQp=pl%2Bus#plus+frag", ub.toUrlString());
130 | }
131 |
132 | @Test
133 | public void testFragment() throws CharacterCodingException {
134 | UrlBuilder ub = forHost("http", "foo.com");
135 | ub.queryParam("foo", "bar");
136 | ub.fragment("#frag/?");
137 | assertUrlEquals("http://foo.com?foo=bar#%23frag/?", ub.toUrlString());
138 | }
139 |
140 | @Test
141 | public void testAllParts() throws CharacterCodingException {
142 | UrlBuilder ub = forHost("https", "foo.bar.com", 3333);
143 | ub.pathSegment("foo");
144 | ub.pathSegment("bar");
145 | ub.matrixParam("mtx1", "val1");
146 | ub.matrixParam("mtx2", "val2");
147 | ub.queryParam("q1", "v1");
148 | ub.queryParam("q2", "v2");
149 | ub.fragment("zomg it's a fragment");
150 |
151 | assertEquals("https://foo.bar.com:3333/foo/bar;mtx1=val1;mtx2=val2?q1=v1&q2=v2#zomg%20it's%20a%20fragment",
152 | ub.toUrlString());
153 | }
154 |
155 | @Test
156 | public void testIPv4Literal() throws CharacterCodingException {
157 | UrlBuilder ub = forHost("http", "127.0.0.1");
158 | assertUrlEquals("http://127.0.0.1", ub.toUrlString());
159 | }
160 |
161 | @Test
162 | public void testBadIPv4LiteralDoesntChoke() throws CharacterCodingException {
163 | UrlBuilder ub = forHost("http", "300.100.50.1");
164 | assertUrlEquals("http://300.100.50.1", ub.toUrlString());
165 | }
166 |
167 | @Test
168 | public void testIPv6LiteralLocalhost() throws CharacterCodingException {
169 | UrlBuilder ub = forHost("http", "[::1]");
170 | assertUrlEquals("http://[::1]", ub.toUrlString());
171 | }
172 |
173 | @Test
174 | public void testIPv6Literal() throws CharacterCodingException {
175 | UrlBuilder ub = forHost("http", "[2001:db8:85a3::8a2e:370:7334]");
176 | assertUrlEquals("http://[2001:db8:85a3::8a2e:370:7334]", ub.toUrlString());
177 | }
178 |
179 | @Test
180 | public void testEncodedRegNameSingleByte() throws CharacterCodingException {
181 | UrlBuilder ub = forHost("http", "host?name;");
182 | assertUrlEquals("http://host%3Fname;", ub.toUrlString());
183 | }
184 |
185 | @Test
186 | public void testEncodedRegNameMultiByte() throws CharacterCodingException {
187 | UrlBuilder ub = forHost("http", "snow\u2603man");
188 | assertUrlEquals("http://snow%E2%98%83man", ub.toUrlString());
189 | }
190 |
191 | @Test
192 | public void testForceTrailingSlash() throws CharacterCodingException {
193 | UrlBuilder ub = forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c");
194 |
195 | assertUrlEquals("https://foo.com/a/b/c/", ub.toUrlString());
196 | }
197 |
198 | @Test
199 | public void testForceTrailingSlashWithQueryParams() throws CharacterCodingException {
200 | UrlBuilder ub =
201 | forHost("https", "foo.com").forceTrailingSlash().pathSegments("a", "b", "c").queryParam("foo", "bar");
202 |
203 | assertUrlEquals("https://foo.com/a/b/c/?foo=bar", ub.toUrlString());
204 | }
205 |
206 | @Test
207 | public void testForceTrailingSlashNoPathSegmentsWithMatrixParams() throws CharacterCodingException {
208 | UrlBuilder ub = forHost("https", "foo.com").forceTrailingSlash().matrixParam("m1", "v1");
209 |
210 | assertUrlEquals("https://foo.com/;m1=v1/", ub.toUrlString());
211 | }
212 |
213 | @Test
214 | public void testIntermingledMatrixParamsAndPathSegments() throws CharacterCodingException {
215 |
216 | UrlBuilder ub = forHost("http", "foo.com")
217 | .pathSegments("seg1", "seg2")
218 | .matrixParam("m1", "v1")
219 | .pathSegment("seg3")
220 | .matrixParam("m2", "v2");
221 |
222 | assertUrlEquals("http://foo.com/seg1/seg2;m1=v1/seg3;m2=v2", ub.toUrlString());
223 | }
224 |
225 | @Test
226 | public void testFromUrlWithEverything() {
227 | String orig =
228 | "https://foo.bar.com:3333/foo/ba%20r;mtx1=val1;mtx2=val%202/seg%203;m2=v2?q1=v1&q2=v%202#zomg%20it's%20a%20fragment";
229 | assertUrlBuilderRoundtrip(orig);
230 | }
231 |
232 | @Test
233 | public void testFromUrlWithEmptyPath() {
234 | assertUrlBuilderRoundtrip("http://foo.com");
235 | }
236 |
237 | @Test
238 | public void testFromUrlWithEmptyPathAndSlash() {
239 | assertUrlBuilderRoundtrip("http://foo.com/", "http://foo.com");
240 | }
241 |
242 | @Test
243 | public void testFromUrlWithPort() {
244 | assertUrlBuilderRoundtrip("http://foo.com:1234");
245 | }
246 |
247 | @Test
248 | public void testFromUrlWithEmptyPathSegent() {
249 | assertUrlBuilderRoundtrip("http://foo.com/foo//", "http://foo.com/foo");
250 | }
251 |
252 | @Test
253 | public void testFromUrlWithEncodedHost() {
254 | assertUrlBuilderRoundtrip("http://f%20oo.com/bar");
255 | }
256 |
257 | @Test
258 | public void testFromUrlWithEncodedPathSegment() {
259 | assertUrlBuilderRoundtrip("http://foo.com/foo/b%20ar");
260 | }
261 |
262 | @Test
263 | public void testFromUrlWithEncodedMatrixParam() {
264 | assertUrlBuilderRoundtrip("http://foo.com/foo;m1=v1;m%202=v%202");
265 | }
266 |
267 | @Test
268 | public void testFromUrlWithEncodedQueryParam() {
269 | assertUrlBuilderRoundtrip("http://foo.com/foo?q%201=v%202&q2=v2");
270 | }
271 |
272 | @Test
273 | public void testFromUrlWithEncodedQueryParamDelimiter() {
274 | assertUrlBuilderRoundtrip("http://foo.com/foo?q1=%3Dv1&%26q2=v2");
275 | }
276 |
277 | @Test
278 | public void testFromUrlWithEncodedFragment() {
279 | assertUrlBuilderRoundtrip("http://foo.com/foo#b%20ar");
280 | }
281 |
282 | @Test
283 | public void testFromUrlWithMalformedMatrixPair() throws MalformedURLException, CharacterCodingException {
284 | try {
285 | fromUrl(new URL("http://foo.com/foo;m1=v1=v2"));
286 | fail();
287 | } catch (IllegalArgumentException e) {
288 | assertEquals("Malformed matrix param: ", e.getMessage());
289 | }
290 | }
291 |
292 | @Test
293 | public void testFromUrlWithEmptyPathSegmentWithMatrixParams() {
294 | assertUrlBuilderRoundtrip("http://foo.com/foo/;m1=v1");
295 | }
296 |
297 | @Test
298 | public void testFromUrlWithEmptyPathWithMatrixParams() {
299 | assertUrlBuilderRoundtrip("http://foo.com/;m1=v1");
300 | }
301 |
302 | @Test
303 | public void testFromUrlWithEmptyPathWithMultipleMatrixParams() {
304 | assertUrlBuilderRoundtrip("http://foo.com/;m1=v1;m2=v2");
305 | }
306 |
307 | @Test
308 | public void testFromUrlWithPathSegmentEndingWithSemicolon() {
309 | assertUrlBuilderRoundtrip("http://foo.com/foo;", "http://foo.com/foo");
310 | }
311 |
312 | @Test
313 | public void testPercentDecodeInvalidPair() throws MalformedURLException, CharacterCodingException {
314 | try {
315 | fromUrl(new URL("http://foo.com/fo%2o"));
316 | fail();
317 | } catch (IllegalArgumentException e) {
318 | assertEquals("Invalid %-tuple <%2o>", e.getMessage());
319 | }
320 | }
321 |
322 | @Test
323 | public void testFromUrlMalformedQueryParamMultiValues() {
324 | assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1=v2");
325 | }
326 |
327 | @Test
328 | public void testFromUrlMalformedQueryParamNoValue() {
329 | assertUrlBuilderRoundtrip("http://foo.com/foo?q1=v1&q2");
330 | }
331 |
332 | @Test
333 | public void testFromUrlUnstructuredQueryWithEscapedChars() {
334 | assertUrlBuilderRoundtrip("http://foo.com/foo?query==&%23");
335 | }
336 |
337 | @Test
338 | public void testCantUseQueryParamAfterQuery() {
339 | UrlBuilder ub = forHost("http", "foo.com").unstructuredQuery("q");
340 |
341 | try {
342 | ub.queryParam("foo", "bar");
343 | fail();
344 | } catch (IllegalStateException e) {
345 | assertEquals("Cannot call queryParam() when this already has an unstructured query specified",
346 | e.getMessage());
347 | }
348 | }
349 |
350 | @Test
351 | public void testCantUseQueryAfterQueryParam() {
352 | UrlBuilder ub = forHost("http", "foo.com").queryParam("foo", "bar");
353 |
354 | try {
355 | ub.unstructuredQuery("q");
356 |
357 | fail();
358 | } catch (IllegalStateException e) {
359 | assertEquals("Cannot call unstructuredQuery() when this already has queryParam pairs specified",
360 | e.getMessage());
361 | }
362 | }
363 |
364 | @Test
365 | public void testUnstructuredQueryWithNoSpecialChars() throws CharacterCodingException {
366 | assertUrlEquals("http://foo.com?q", forHost("http", "foo.com").unstructuredQuery("q").toUrlString());
367 | }
368 |
369 | @Test
370 | public void testUnstructuredQueryWithOkSpecialChars() throws CharacterCodingException {
371 | assertUrlEquals("http://foo.com?q?/&=", forHost("http", "foo.com").unstructuredQuery("q?/&=").toUrlString());
372 | }
373 |
374 | @Test
375 | public void testUnstructuredQueryWithEscapedSpecialChars() throws CharacterCodingException {
376 | assertUrlEquals("http://foo.com?q%23%2B", forHost("http", "foo.com").unstructuredQuery("q#+").toUrlString());
377 | }
378 |
379 | @Test
380 | public void testClearQueryRemovesQueryParam() throws CharacterCodingException {
381 | UrlBuilder ub = forHost("http", "host")
382 | .queryParam("foo", "bar")
383 | .clearQuery();
384 | assertUrlEquals("http://host", ub.toUrlString());
385 | }
386 |
387 | @Test
388 | public void testClearQueryRemovesUnstructuredQuery() throws CharacterCodingException {
389 | UrlBuilder ub = forHost("http", "host")
390 | .unstructuredQuery("foobar")
391 | .clearQuery();
392 | assertUrlEquals("http://host", ub.toUrlString());
393 | }
394 |
395 | @Test
396 | public void testClearQueryAfterQueryParamAllowsQuery() throws CharacterCodingException {
397 | UrlBuilder ub = forHost("http", "host")
398 | .queryParam("foo", "bar")
399 | .clearQuery()
400 | .unstructuredQuery("foobar");
401 | assertUrlEquals("http://host?foobar", ub.toUrlString());
402 | }
403 |
404 | @Test
405 | public void testClearQueryAfterQueryAllowsQueryParam() throws CharacterCodingException {
406 | UrlBuilder ub = forHost("http", "host")
407 | .unstructuredQuery("foobar")
408 | .clearQuery()
409 | .queryParam("foo", "bar");
410 | assertUrlEquals("http://host?foo=bar", ub.toUrlString());
411 | }
412 |
413 | private void assertUrlBuilderRoundtrip(String url) {
414 | assertUrlBuilderRoundtrip(url, url);
415 | }
416 |
417 | /**
418 | * @param origUrl the url that will be used to create a URL
419 | * @param finalUrl the URL string it should end up as
420 | */
421 | private void assertUrlBuilderRoundtrip(String origUrl, String finalUrl) {
422 | try {
423 | assertUrlEquals(finalUrl, fromUrl(new URL(origUrl)).toUrlString());
424 | } catch (CharacterCodingException | MalformedURLException e) {
425 | throw new RuntimeException(e);
426 | }
427 | }
428 |
429 | private static void assertUrlEquals(String expected, String actual) {
430 | assertEquals(expected, actual);
431 | try {
432 | assertEquals(expected, new URI(actual).toString());
433 | } catch (URISyntaxException e) {
434 | throw new RuntimeException(e);
435 | }
436 | try {
437 | assertEquals(expected, new URL(actual).toString());
438 | } catch (MalformedURLException e) {
439 | throw new RuntimeException(e);
440 | }
441 | }
442 | }
443 |
--------------------------------------------------------------------------------
/src/test/kotlin/com/palominolabs/http/url/PercentDecoderTest.kt:
--------------------------------------------------------------------------------
1 | package com.palominolabs.http.url
2 |
3 | import org.junit.jupiter.api.BeforeEach
4 | import org.junit.jupiter.api.Test
5 | import org.junit.jupiter.api.assertThrows
6 | import java.lang.Character.isHighSurrogate
7 | import java.lang.Character.isLowSurrogate
8 | import java.nio.charset.StandardCharsets.UTF_8
9 | import java.util.Random
10 | import kotlin.streams.asSequence
11 | import kotlin.test.assertEquals
12 | import kotlin.test.fail
13 |
14 | class PercentDecoderTest {
15 | private lateinit var decoder: PercentDecoder
16 |
17 | @BeforeEach
18 | fun setUp() {
19 | decoder = PercentDecoder(UTF_8.newDecoder())
20 | }
21 |
22 | @Test
23 | fun testDecodesWithoutPercents() {
24 | assertEquals("asdf", decoder.decode("asdf"))
25 | }
26 |
27 | @Test
28 | fun testDecodeSingleByte() {
29 | assertEquals("#", decoder.decode("%23"))
30 | }
31 |
32 | @Test
33 | fun testIncompletePercentPairNoNumbers() {
34 | val e = assertThrows { decoder.decode("%") }
35 | assertEquals("Could not percent decode <%>: incomplete %-pair at position 0", e.message)
36 | }
37 |
38 | @Test
39 | fun testIncompletePercentPairOneNumber() {
40 | val e = assertThrows { decoder.decode("%2") }
41 | assertEquals("Could not percent decode <%2>: incomplete %-pair at position 0", e.message)
42 | }
43 |
44 | @Test
45 | fun testInvalidHex() {
46 | val e = assertThrows { decoder.decode("%xz") }
47 | assertEquals("Invalid %-tuple <%xz>", e.message)
48 | }
49 |
50 | @Test
51 | fun testRandomStrings() {
52 | val encoder = UrlPercentEncoders.getUnstructuredQueryEncoder()
53 | val rand = Random()
54 |
55 | val seed = rand.nextLong()
56 | rand.setSeed(seed)
57 |
58 | val charBuf = CharArray(2)
59 | val codePoints = mutableListOf()
60 | val buf = StringBuilder()
61 |
62 | repeat(10_000) {
63 | buf.setLength(0)
64 | codePoints.clear()
65 |
66 | randString(buf, codePoints, charBuf, rand, 1 + rand.nextInt(1000))
67 |
68 | val origBytes = buf.toString().encodeToByteArray()
69 | val codePointsHex = codePoints.map { i -> Integer.toHexString(i) }
70 |
71 | val decodedBytes =
72 | try {
73 | decoder.decode(encoder.encode(buf.toString())).encodeToByteArray()
74 | } catch (e: IllegalArgumentException) {
75 | val charHex = buf.toString()
76 | .chars()
77 | .asSequence()
78 | .map { Integer.toHexString(it) }
79 | .toList()
80 | fail("seed: $seed code points: $codePointsHex chars $charHex $e.message")
81 | }
82 |
83 | assertEquals(toHex(origBytes), toHex(decodedBytes), "Seed: $seed Code points: $codePointsHex")
84 | }
85 | }
86 |
87 | /**
88 | * Generate a random string
89 | * @param buf buffer to write into
90 | * @param codePoints list of code points to write into
91 | * @param charBuf char buf for temporary char wrangling (size 2)
92 | * @param rand random source
93 | * @param length max string length
94 | */
95 | private fun randString(
96 | buf: StringBuilder,
97 | codePoints: MutableList,
98 | charBuf: CharArray,
99 | rand: Random,
100 | length: Int
101 | ) {
102 | while (buf.length < length) {
103 | // pick something in the range of all 17 unicode planes
104 | val codePoint = rand.nextInt(17 * 65536)
105 | if (Character.isDefined(codePoint)) {
106 | val res = Character.toChars(codePoint, charBuf, 0)
107 |
108 | if (res == CODE_POINT_IN_BMP && (isHighSurrogate(charBuf[0]) || isLowSurrogate(charBuf[0]))) {
109 | // isDefined is true even if it's a standalone surrogate in the D800-DFFF range, but those are not legal
110 | // single unicode code units (that is, a single char)
111 | continue
112 | }
113 |
114 | buf.append(charBuf[0])
115 | // whether it's a pair or not, we want the only char (or high surrogate)
116 | codePoints.add(codePoint)
117 | if (res == CODE_POINT_IN_SUPPLEMENTARY) {
118 | // it's a surrogate pair, so we care about the second char
119 | buf.append(charBuf[1])
120 | }
121 | }
122 | }
123 | }
124 | }
125 |
126 | /**
127 | * @param bytes
128 | * @return list of hex strings
129 | */
130 | private fun toHex(bytes: ByteArray): List = bytes.map { Integer.toHexString(it.toInt().and(0xFF)) }
131 |
132 | private const val CODE_POINT_IN_SUPPLEMENTARY = 2
133 | private const val CODE_POINT_IN_BMP = 1
134 |
--------------------------------------------------------------------------------