├── urllib ├── src │ ├── main │ │ ├── java │ │ │ └── org │ │ │ │ └── urllib │ │ │ │ ├── internal │ │ │ │ ├── authority │ │ │ │ │ ├── InvalidHostException.java │ │ │ │ │ ├── Port.java │ │ │ │ │ ├── Dns.java │ │ │ │ │ ├── Hosts.java │ │ │ │ │ ├── Ip4.java │ │ │ │ │ ├── Authority.java │ │ │ │ │ └── Ip6.java │ │ │ │ ├── StandardCharsets.java │ │ │ │ ├── Hex.java │ │ │ │ ├── Scheme.java │ │ │ │ ├── Type.java │ │ │ │ ├── PercentDecoder.java │ │ │ │ ├── EncodeRules.java │ │ │ │ ├── Strings.java │ │ │ │ ├── Queries.java │ │ │ │ ├── CodepointMatcher.java │ │ │ │ ├── SplitUrl.java │ │ │ │ ├── Paths.java │ │ │ │ └── PercentEncoder.java │ │ │ │ ├── Query.java │ │ │ │ ├── Host.java │ │ │ │ ├── Url.java │ │ │ │ ├── Path.java │ │ │ │ ├── UrlBuilder.java │ │ │ │ └── Urls.java │ │ └── resources │ │ │ └── encode-set.tsv │ └── test │ │ └── java │ │ └── org │ │ └── urllib │ │ ├── internal │ │ ├── authority │ │ │ ├── DnsTest.java │ │ │ ├── HostsTest.java │ │ │ ├── Ip4Test.java │ │ │ ├── AuthorityTest.java │ │ │ └── Ip6Test.java │ │ ├── StringsTest.java │ │ ├── PercentDecoderTest.java │ │ ├── SplitUrlTest.java │ │ ├── PathsTest.java │ │ └── PercentEncoderTest.java │ │ ├── QueryTest.java │ │ ├── TestEncodingRules.java │ │ └── UrlsTest.java └── pom.xml ├── .gitignore ├── urllibfuzz ├── pom.xml └── src │ └── test │ └── java │ └── org │ └── urllib │ └── UrlsFuzzTest.java ├── README.md ├── pom.xml └── LICENSE /urllib/src/main/java/org/urllib/internal/authority/InvalidHostException.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | class InvalidHostException extends IllegalArgumentException { 4 | 5 | InvalidHostException(String hostname, int i) { 6 | super(String.format("Invalid hostname: Illegal character at %d in %s.", i, hostname)); 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/StandardCharsets.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import java.nio.charset.Charset; 4 | 5 | public final class StandardCharsets { 6 | 7 | private StandardCharsets() {} 8 | 9 | public static final Charset US_ASCII = Charset.forName("US-ASCII"); 10 | public static final Charset UTF_8 = Charset.forName("UTF-8"); 11 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | 7 | # BlueJ files 8 | *.ctxt 9 | 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | 13 | # Package Files # 14 | *.jar 15 | *.war 16 | *.ear 17 | *.zip 18 | *.tar.gz 19 | *.rar 20 | 21 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 22 | hs_err_pid* 23 | 24 | .classpath 25 | .project 26 | .settings/ 27 | target/ 28 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/Query.java: -------------------------------------------------------------------------------- 1 | package org.urllib; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | import javax.annotation.Nonnull; 6 | import javax.annotation.Nullable; 7 | 8 | /** 9 | * Represents a Url's query. 10 | * 11 | * @see RFC 3986#3.4 12 | */ 13 | public interface Query { 14 | 15 | List params(); 16 | Map asMap(); 17 | boolean isEmpty(); 18 | String encoded(); 19 | 20 | interface KeyValue { 21 | @Nonnull String key(); 22 | @Nullable String value(); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/Hex.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | public final class Hex { 4 | 5 | static int decodeHex(int b1, int b2) { 6 | int i1 = decodeHex(b1); 7 | int i2 = decodeHex(b2); 8 | return i1 > -1 && i2 > -1 9 | ? i1 << 4 | i2 10 | : -1; 11 | } 12 | 13 | private static int decodeHex(int b) { 14 | if (b >= '0' && b <= '9') { 15 | return b - '0'; 16 | } else if (b >= 'A' && b <= 'F') { 17 | return b - 'A' + 0xA; 18 | } else if (b >= 'a' && b <= 'f') { 19 | return b - 'a' + 0xA; 20 | } else { 21 | return -1; 22 | } 23 | } 24 | 25 | public static boolean isHex(int i) { 26 | return decodeHex(i) > -1; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/Scheme.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import com.google.auto.value.AutoValue; 4 | import java.util.Locale; 5 | 6 | @AutoValue 7 | public abstract class Scheme { 8 | 9 | public static final Scheme HTTP = create("http", 80); 10 | public static final Scheme HTTPS = create("https", 443); 11 | 12 | public abstract String name(); 13 | public abstract int defaultPort(); 14 | 15 | public static Scheme create(String name, int port) {return new AutoValue_Scheme(name, port);} 16 | public static Scheme valueOf(String scheme) { 17 | switch (scheme.toLowerCase(Locale.US)) { 18 | case "http": 19 | return HTTP; 20 | case "https": 21 | return HTTPS; 22 | } 23 | throw new IllegalArgumentException("Scheme must be http or https."); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /urllibfuzz/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | urllib-parent 7 | org.urllib 8 | 0.8.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | urllib-fuzz 13 | 14 | 15 | 16 | org.urllib 17 | urllib 18 | ${project.version} 19 | test 20 | 21 | 22 | org.apache.commons 23 | commons-text 24 | 1.1 25 | test 26 | 27 | 28 | -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/internal/authority/DnsTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | import static org.hamcrest.CoreMatchers.containsString; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertThat; 6 | import static org.junit.Assert.fail; 7 | 8 | import org.junit.Test; 9 | 10 | public class DnsTest { 11 | 12 | @Test public void dontAllowLeadingDots() { 13 | assertInvalid(".example.com"); 14 | } 15 | 16 | @Test public void dontAllowEmptySegments() { 17 | assertInvalid("example..com"); 18 | } 19 | 20 | @Test public void convertToLowerCase() { 21 | assertEquals(Dns.parse("example.com"), Dns.parse("EXAMPLE.com")); 22 | } 23 | 24 | private void assertInvalid(String host) { 25 | try { 26 | Dns.parse(host); 27 | fail("Expected IllegalArgumentException for: " + host); 28 | } catch (IllegalArgumentException expected) { 29 | assertThat(expected.getMessage(), containsString("Invalid hostname")); 30 | } 31 | } 32 | } -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/authority/Port.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | public class Port { 4 | 5 | private static final int MIN_PORT = 1; 6 | private static final int MAX_PORT = (2 << 15) - 1; 7 | 8 | public static int validateOrThrow(String portString) { 9 | int port; 10 | try { 11 | port = Integer.parseInt(portString); 12 | } catch (NumberFormatException e) { 13 | throw portException(portString); 14 | } 15 | 16 | return validateOrThrow(port); 17 | } 18 | 19 | public static int validateOrThrow(int port) { 20 | if (port >= Port.MIN_PORT && port <= Port.MAX_PORT) { 21 | return port; 22 | } 23 | 24 | throw portException(String.valueOf(port)); 25 | } 26 | 27 | private static IllegalArgumentException portException(String portString) { 28 | return new IllegalArgumentException( 29 | String.format("Invalid port in authority. Valid values are [%d-%d] inclusive. Found: %s", 30 | MIN_PORT, MAX_PORT, portString)); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/authority/Dns.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | import com.google.auto.value.AutoValue; 4 | import java.net.IDN; 5 | import java.util.Locale; 6 | import org.urllib.Host; 7 | import org.urllib.internal.CodepointMatcher; 8 | 9 | @AutoValue 10 | abstract class Dns implements Host { 11 | 12 | private static final CodepointMatcher DNS = CodepointMatcher.or( 13 | CodepointMatcher.ALPHANUMERIC, CodepointMatcher.anyOf("-.")); 14 | 15 | static Dns parse(String hostname) { 16 | int lastDot = -1; 17 | for (int i = 0; i < hostname.length(); i++) { 18 | char c = hostname.charAt(i); 19 | if (!DNS.matches(c)) { 20 | throw new InvalidHostException(hostname, i); 21 | } else if (c == '.') { 22 | if (lastDot == i - 1) { 23 | throw new InvalidHostException(hostname, i); 24 | } 25 | lastDot = i; 26 | } 27 | } 28 | String lower = hostname.toLowerCase(Locale.US); 29 | return new AutoValue_Dns(lower, IDN.toUnicode(lower)); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/Type.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | public enum Type { 4 | 5 | /** 6 | * Contains at least a protocol (scheme) and host. 7 | * 12 | */ 13 | FULL, 14 | 15 | 16 | /** 17 | * Contains everything from {@link Type#FULL}, except the protocol. 18 | * 19 | * 22 | */ 23 | PROTOCOL_RELATIVE, 24 | 25 | 26 | /** 27 | * Contains everything from {@link Type#FULL}, except the protocol. 28 | * 33 | */ 34 | PATH_ABSOLUTE, 35 | 36 | 37 | /** 38 | * Contains everything from {@link Type#FULL}, except the protocol. 39 | * 44 | */ 45 | PATH_RELATIVE, 46 | 47 | 48 | /** 49 | * Contains everything from {@link Type#FULL}, except the protocol. 50 | * 53 | */ 54 | FRAGMENT, 55 | } 56 | -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/internal/StringsTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import static org.junit.Assert.assertArrayEquals; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertFalse; 6 | import static org.junit.Assert.assertTrue; 7 | 8 | import org.junit.Test; 9 | 10 | public class StringsTest { 11 | 12 | @Test public void codepoints() { 13 | // Two java characters collapse into a single codepoint 14 | assertArrayEquals(new int[]{65536}, Strings.codePoints("\uD800\uDC00")); 15 | assertArrayEquals(new int[]{9731}, Strings.codePoints("☃")); 16 | } 17 | 18 | @Test public void isNullOrEmpty() { 19 | assertTrue(Strings.isNullOrEmpty(null)); 20 | assertTrue(Strings.isNullOrEmpty("")); 21 | assertFalse(Strings.isNullOrEmpty("a")); 22 | } 23 | 24 | @Test public void santizeWhitespace() { 25 | assertEquals("", Strings.sanitizeWhitespace(" ")); 26 | assertEquals("a", Strings.sanitizeWhitespace(" a ")); 27 | assertEquals("ab", Strings.sanitizeWhitespace(" a\nb ")); 28 | assertEquals("ab", Strings.sanitizeWhitespace(" a\n b ")); 29 | assertEquals("ab", Strings.sanitizeWhitespace("a\n b")); 30 | assertEquals("ab c", Strings.sanitizeWhitespace("a\n b c")); 31 | assertEquals("ab c", Strings.sanitizeWhitespace("a\n b c")); 32 | assertEquals("a", Strings.sanitizeWhitespace("a\n ")); 33 | } 34 | 35 | } -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/Host.java: -------------------------------------------------------------------------------- 1 | package org.urllib; 2 | 3 | /** 4 | * A Url's host, with encoding for the network and encoding for display. 5 | * 6 | * 7 | * 8 | * 13 | * 14 | * 15 | * 16 | * 17 | * 18 | * 19 | * 20 | * 21 | * 22 | * 23 | * 24 | * 25 | * 26 | * 27 | * 28 | * 29 | * 30 | * 31 | * 32 | * 33 | * 34 | * 35 | * 36 | * 37 | *
Type 9 | * URL 10 | * Host.display() 11 | * Host.name() 12 | *
ASCII DNShttp://duckduckgo.com/duckduckgo.comduckduckgo.com
Internationalhttp://кот.ru/кот.ruxn--j1aim.ru
IPv4http://10.20.30.40/10.20.30.4010.20.30.40
IPv6http://[2001:db8::1:0:0:1]/2001:db8::1:0:0:1[2001:db8::1:0:0:1]
38 | * 39 | * @see RFC 3986#3.2.2 40 | */ 41 | public interface Host { 42 | 43 | /** 44 | * Returns the Url's host, encoded so that it can be passed to methods such as 45 | * {@link java.net.InetAddress#getByName(String)}. 46 | */ 47 | String name(); 48 | 49 | /** 50 | * Returns the Url's host, encoded so that it can be shown to a person (for example in a 51 | * browser's address bar). 52 | */ 53 | String display(); 54 | } 55 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/authority/Hosts.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | import java.net.IDN; 4 | import javax.annotation.Nonnull; 5 | import org.urllib.Host; 6 | import org.urllib.internal.PercentDecoder; 7 | 8 | final class Hosts { 9 | 10 | @Nonnull static Host parse(String hostname) { 11 | String ascii = validateAndConvertToAscii(hostname); 12 | 13 | Host host; 14 | if (Ip6.isIpv6(ascii)) { 15 | host = Ip6.parse(ascii); 16 | } else { 17 | if (ascii.endsWith(".")) { 18 | ascii = ascii.substring(0, ascii.length() - 1); 19 | } 20 | if (Ip4.isIpv4(ascii)) { 21 | host = Ip4.parse(ascii); 22 | } else { 23 | host = Dns.parse(ascii); 24 | } 25 | } 26 | 27 | if (host == null) { 28 | throw new IllegalArgumentException("Invalid hostname: " + hostname); 29 | } 30 | 31 | return host; 32 | } 33 | 34 | private static String validateAndConvertToAscii(String hostname) { 35 | String ascii; 36 | try { 37 | ascii = IDN.toASCII(PercentDecoder.decodeUnreserved(hostname), IDN.ALLOW_UNASSIGNED); 38 | } catch (IllegalArgumentException e) { 39 | throw new IllegalArgumentException("Invalid hostname: " + hostname); 40 | } 41 | 42 | if (ascii.isEmpty() || ".".equals(ascii)) { 43 | throw new IllegalArgumentException("Invalid hostname: cannot be null or empty."); 44 | } 45 | 46 | return ascii; 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /urllibfuzz/src/test/java/org/urllib/UrlsFuzzTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib; 2 | 3 | import java.util.Random; 4 | import org.apache.commons.text.RandomStringGenerator; 5 | import org.junit.Test; 6 | 7 | public class UrlsFuzzTest { 8 | 9 | private static final Random RAND = new Random(); 10 | private static final int RUN_LEN = 10000; 11 | private static final RandomStringGenerator UNFILTERED = 12 | new RandomStringGenerator.Builder().build(); 13 | private static final RandomStringGenerator ASCII = 14 | new RandomStringGenerator.Builder().withinRange(0x00, 0x7F).build(); 15 | private static final RandomStringGenerator ASCII_PRINTABLE = 16 | new RandomStringGenerator.Builder().withinRange(0x20, 0x7E).build(); 17 | 18 | @Test public void minimalEscape() { 19 | for (int i = 0; i < RUN_LEN; i++) { 20 | int length = RAND.nextInt(20); 21 | Urls.escape("http://host.com/" + UNFILTERED.generate(length)); 22 | Urls.escape("http://host.com/" + ASCII.generate(length)); 23 | Urls.escape("http://host.com/" + ASCII_PRINTABLE.generate(length)); 24 | } 25 | } 26 | 27 | @Test public void createURI() { 28 | for (int i = 0; i < RUN_LEN; i++) { 29 | int length = RAND.nextInt(20); 30 | Urls.createURI("http://host.com/" + UNFILTERED.generate(length)); 31 | Urls.createURI("http://host.com/" + ASCII.generate(length)); 32 | Urls.createURI("http://host.com/" + ASCII_PRINTABLE.generate(length)); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/internal/authority/HostsTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | import static org.hamcrest.CoreMatchers.containsString; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertThat; 6 | import static org.junit.Assert.fail; 7 | 8 | import org.junit.Test; 9 | 10 | public class HostsTest { 11 | 12 | @Test public void allowTrailingDot() { 13 | assertEquals(Hosts.parse("example.com"), Hosts.parse("example.com.")); 14 | assertEquals(Hosts.parse("example"), Hosts.parse("example.")); 15 | assertEquals(Hosts.parse("1.1.1.1"), Hosts.parse("1.1.1.1.")); 16 | } 17 | 18 | @Test public void dontAllowEmptySegments() { 19 | assertInvalidHost(""); 20 | assertInvalidHost(" "); 21 | assertInvalidHost("."); 22 | assertInvalidHost("%2e"); 23 | assertInvalidHost(".."); 24 | assertInvalidHost("host..com"); 25 | assertInvalidHost("1.1..1.1"); 26 | 27 | assertInvalidHost(":"); 28 | assertInvalidHost("[]"); 29 | assertInvalidHost("[:]"); 30 | } 31 | 32 | @Test public void convertToLowerCase() { 33 | assertEquals(Hosts.parse("example.com"), Hosts.parse("Example.com")); 34 | assertEquals(Hosts.parse("ökonom.de"), Hosts.parse("Ökonom.de")); 35 | assertEquals(Hosts.parse("ли.ru"), Hosts.parse("Ли.ru")); 36 | } 37 | 38 | static void assertInvalidHost(String host) { 39 | try { 40 | Hosts.parse(host); 41 | fail("Expected IllegalArgumentException for: " + host); 42 | } catch (IllegalArgumentException expected) { 43 | assertThat(expected.getMessage(), containsString("Invalid hostname")); 44 | } 45 | } 46 | 47 | } -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/authority/Ip4.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | import com.google.auto.value.AutoValue; 4 | import javax.annotation.Nullable; 5 | import org.urllib.Host; 6 | import org.urllib.internal.CodepointMatcher; 7 | 8 | @AutoValue 9 | abstract class Ip4 implements Host { 10 | 11 | @Nullable static Ip4 parse(String hostname) { 12 | String[] segments = hostname.split("\\."); 13 | if (segments.length != 4) { 14 | return null; 15 | } 16 | byte[] addr = new byte[4]; 17 | for (int i = 0; i < segments.length; i++) { 18 | int val; 19 | String segment = segments[i]; 20 | // Don't allow segments that start with zero, since 21 | // it's unclear whether they're octal. 22 | if (segment.length() > 1 && segment.startsWith("0")) { 23 | return null; 24 | } 25 | try { 26 | val = Integer.parseInt(segment); 27 | } catch (NumberFormatException e) { 28 | return null; 29 | } 30 | if (val < 0 || val > 255) { 31 | return null; 32 | } 33 | addr[i] = (byte) val; 34 | } 35 | return fromAddress(addr); 36 | } 37 | 38 | private static Ip4 fromAddress(byte[] addr) { 39 | String formatted = (addr[0] & 0xff) + "." + (addr[1] & 0xff) 40 | + "." + (addr[2] & 0xff) + "." + (addr[3] & 0xff); 41 | return new AutoValue_Ip4(formatted, formatted); 42 | } 43 | 44 | static boolean isIpv4(String hostname) { 45 | int dot = hostname.lastIndexOf('.'); 46 | if (dot == hostname.length() - 1) { 47 | return false; 48 | } 49 | // If a dot isn't found, then -1 is returned and we check the first character. 50 | return CodepointMatcher.DIGIT.matches(hostname.charAt(dot + 1)); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/PercentDecoder.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | public final class PercentDecoder { 4 | 5 | private PercentDecoder() {} 6 | 7 | public static String decodeAll(String str) { 8 | return decode(str, CodepointMatcher.ALL); 9 | } 10 | 11 | public static String decodeUnreserved(String str) { 12 | return decode(str, CodepointMatcher.UNRESERVED); 13 | } 14 | 15 | private static String decode(String str, CodepointMatcher decodeSet) { 16 | if (str.isEmpty()) return str; 17 | if (!requiresDecoding(str, decodeSet)) return str; 18 | 19 | byte[] bytes = str.getBytes(StandardCharsets.UTF_8); 20 | int p = 0; 21 | int i = 0; 22 | for (int end = bytes.length; i < end; i++) { 23 | byte b = bytes[i]; 24 | if (b == '%' && i < end - 2) { 25 | byte b1 = bytes[i + 1]; 26 | byte b2 = bytes[i + 2]; 27 | int decoded = Hex.decodeHex(b1, b2); 28 | if (decoded > -1 && decodeSet.matches(decoded)) { 29 | bytes[p++] = (byte) decoded; 30 | i += 2; 31 | continue; 32 | } 33 | } 34 | 35 | if (p != i) { 36 | bytes[p] = b; 37 | } 38 | p++; 39 | } 40 | return new String(bytes, 0, p, StandardCharsets.UTF_8); 41 | } 42 | 43 | private static boolean requiresDecoding(String str, CodepointMatcher decodeSet) { 44 | for (int i = 0; i < str.length() - 2; i++) { 45 | char c = str.charAt(i); 46 | if (c == '%') { 47 | char c1 = str.charAt(i + 1); 48 | char c2 = str.charAt(i +2); 49 | int decoded = Hex.decodeHex(c1, c2); 50 | if (decoded != -1 && decodeSet.matches(decoded)) { 51 | return true; 52 | } 53 | } 54 | } 55 | return false; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/Url.java: -------------------------------------------------------------------------------- 1 | package org.urllib; 2 | 3 | import java.net.URI; 4 | import javax.annotation.Nonnegative; 5 | import javax.annotation.Nonnull; 6 | 7 | /** 8 | * A uniform resource locator (Url) that is 9 | * immutable, compliant with RFC 3986, and 10 | * interops with Java's {@link URI}. 11 | * 12 | * @since 1.0 13 | */ 14 | public interface Url { 15 | 16 | /** 17 | * Returns the Url's scheme in lowercase. 18 | * 19 | * @see RFC 3986#3.1 20 | */ 21 | @Nonnull String scheme(); 22 | 23 | /** 24 | * Returns the Url's host. 25 | * 26 | * @see RFC 3986#3.2.2 27 | */ 28 | @Nonnull Host host(); 29 | 30 | /** 31 | * Returns the Url's port. 32 | * 33 | * @see RFC 3986#3.2.3 34 | */ 35 | @Nonnegative int port(); 36 | 37 | /** 38 | * Returns the Url's path. 39 | * 40 | * @see RFC 3986#3.3 41 | */ 42 | @Nonnull Path path(); 43 | 44 | /** 45 | * Returns the Url's query. 46 | * 47 | * @see RFC 3986#3.4 48 | */ 49 | @Nonnull Query query(); 50 | 51 | /** 52 | * Returns the Url's fragment, not escaped. 53 | * 54 | * @see RFC 3986#3.5 55 | */ 56 | @Nonnull String fragment(); 57 | 58 | /** 59 | * Returns this URL as a {@link java.net.URI}. 60 | */ 61 | @Nonnull URI uri(); 62 | 63 | @Nonnull Url resolve(String reference); 64 | } 65 | -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/internal/PercentDecoderTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertSame; 5 | 6 | import org.junit.Test; 7 | 8 | public class PercentDecoderTest { 9 | 10 | @Test public void allAscii() { 11 | StringBuilder expected = new StringBuilder(); 12 | StringBuilder encoded = new StringBuilder(); 13 | 14 | for (char c = 0; c < 0x80; c++) { 15 | expected.append(c); 16 | encoded.append(percentEncode(c)); 17 | } 18 | assertEquals(expected.toString(), PercentDecoder.decodeAll(encoded.toString())); 19 | } 20 | 21 | @Test public void allAsciiUnreserved() { 22 | StringBuilder expected = new StringBuilder(); 23 | StringBuilder encoded = new StringBuilder(); 24 | 25 | for (char c = 0; c < 0x80; c++) { 26 | String percent = percentEncode(c); 27 | if (".-_~".indexOf(c) > -1 || CodepointMatcher.ALPHANUMERIC.matches(c)) { 28 | expected.append(c); 29 | } else { 30 | expected.append(percent); 31 | } 32 | encoded.append(percent); 33 | } 34 | assertEquals(expected.toString(), PercentDecoder.decodeUnreserved(encoded.toString())); 35 | } 36 | 37 | @Test public void decodingSafelyHandlesMalformedPercents() { 38 | // Using assertSame to check that we don't allocate a new string 39 | // when decoding isn't possible. 40 | assertSame("%zz", PercentDecoder.decodeAll("%zz")); 41 | assertSame("%3", PercentDecoder.decodeAll("%3")); 42 | assertSame("%3z", PercentDecoder.decodeAll("%3z")); 43 | assertSame("%", PercentDecoder.decodeAll("%")); 44 | assertSame("%%2", PercentDecoder.decodeAll("%%2")); 45 | assertSame("%2%", PercentDecoder.decodeAll("%2%")); 46 | } 47 | 48 | private String percentEncode(int c) { 49 | return String.format("%%%02X", c); 50 | } 51 | } -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/authority/Authority.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | import com.google.auto.value.AutoValue; 4 | import org.urllib.Host; 5 | 6 | @AutoValue 7 | public abstract class Authority { 8 | 9 | public abstract int port(); 10 | public abstract Host host(); 11 | 12 | public static Authority split(String authority) { 13 | int lastColon = -1; 14 | int numColons = 0; 15 | 16 | int start = authority.length(); 17 | int end = authority.length(); 18 | int port = -1; 19 | 20 | // Move p to the last @, or -1 if not found 21 | // Find the last colon 22 | // Count the number of colons found 23 | while (--start >= 0) { 24 | char b = authority.charAt(start); 25 | if (b == '@') { 26 | break; 27 | } else if (b == ':') { 28 | if (numColons++ == 0) { 29 | lastColon = start; 30 | } 31 | } 32 | } 33 | 34 | // Move p to the first character in the authority 35 | start++; 36 | 37 | if (start == end || start == lastColon) { 38 | throw new IllegalArgumentException("URL missing host. Input: " + authority); 39 | } 40 | 41 | if (numColons == 1) { 42 | port = parseAndValidatePort(authority, lastColon); 43 | end = lastColon; 44 | } else if (numColons > 1) { 45 | if (authority.charAt(lastColon - 1) == ']') { 46 | port = parseAndValidatePort(authority, lastColon); 47 | end = lastColon; 48 | } 49 | } 50 | 51 | return new AutoValue_Authority(port, Hosts.parse(authority.substring(start, end))); 52 | } 53 | 54 | @Override public String toString() { 55 | if (port() > 0) { 56 | return host().name() + ':' + port(); 57 | } else { 58 | return host().name(); 59 | } 60 | } 61 | 62 | private static int parseAndValidatePort(String authority, int lastColon) { 63 | return lastColon == authority.length() - 1 64 | ? -1 65 | : Port.validateOrThrow(authority.substring(lastColon + 1)); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/internal/authority/Ip4Test.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | import static org.hamcrest.CoreMatchers.containsString; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertThat; 6 | import static org.junit.Assert.fail; 7 | 8 | import org.junit.Test; 9 | 10 | public class Ip4Test { 11 | 12 | @Test public void removeTrailingPeriod() { 13 | assertEquals("255.255.255.255", Hosts.parse("255.255.255.255.").name()); 14 | } 15 | 16 | @Test public void percentDecodeBeforeParsing() { 17 | assertEquals("1.1.1.1", Hosts.parse("%31.%31.%31.%31").name()); 18 | assertEquals("1.1.1.1", Hosts.parse("1%2e1%2e1%2e1").name()); 19 | } 20 | 21 | @Test public void disambiguateUnicodeFirst() { 22 | assertEquals("1.1.1.1", Hosts.parse("1。1。1。1").name()); 23 | } 24 | 25 | @Test public void ipv4() { 26 | assertEquals("1.1.1.1", Hosts.parse("1.1.1.1").name()); 27 | assertEquals("0.0.0.1", Hosts.parse("0.0.0.1").name()); 28 | assertEquals("0.0.0.0", Hosts.parse("0.0.0.0").name()); 29 | assertEquals("255.255.255.255", Hosts.parse("255.255.255.255").name()); 30 | } 31 | 32 | @Test public void wrongNumberSegments() { 33 | assertInvalid("1"); 34 | assertInvalid("1.1"); 35 | assertInvalid("1.1.1"); 36 | assertInvalid("1.1.1.1.1"); 37 | } 38 | 39 | @Test public void outOfRange() { 40 | assertInvalid("-1.1.1.1"); 41 | assertInvalid("1.1.1.256"); 42 | assertInvalid("1.1.1.1000"); 43 | } 44 | 45 | @Test public void notAllNumeric() { 46 | assertInvalid("A.1.1.1"); 47 | assertInvalid("1.1.A.1"); 48 | } 49 | 50 | @Test public void dontAllowLeadingZero() { 51 | assertInvalid("4.89.8.05"); 52 | assertInvalid("01.1.1.1"); 53 | } 54 | 55 | 56 | private static void assertInvalid(String host) { 57 | try { 58 | Hosts.parse(host); 59 | fail("Expected IllegalArgumentException for: " + host); 60 | } catch (IllegalArgumentException expected) { 61 | assertThat(expected.getMessage(), containsString("Invalid hostname")); 62 | } 63 | } 64 | } -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/EncodeRules.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | public class EncodeRules { 4 | 5 | public static final String UNRESERVED = 6 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_~"; 7 | 8 | // ; Removed from RFC 3986 since some servers interpret semicolon as a path parameter. 9 | public static final String PATH = "!$&'()*+,:=@" + UNRESERVED; 10 | 11 | // ; Removed from RFC 3986 since some servers split the query on that. 12 | // + Removed from RFC 3986 since most servers change that to a space. 13 | public static final String QUERY = "!$'()*,/:?@" + UNRESERVED; 14 | public static final String FRAGMENT = "!$&'()*+,/:;=?@" + UNRESERVED; 15 | 16 | // RFC 3986 doesn't discuss 'unsafe' characters. The text below is from RFC 1738. 17 | // Although their unsafe category has evolved, the description is useful. 18 | // 19 | // Characters can be unsafe for a number of reasons. The space 20 | // character is unsafe because significant spaces may disappear and 21 | // insignificant spaces may be introduced when URLs are transcribed or 22 | // typeset or subjected to the treatment of word-processing programs. 23 | // The characters "<" and ">" are unsafe because they are used as the 24 | // delimiters around URLs in free text; the quote mark (""") is used to 25 | // delimit URLs in some systems. The character "#" is unsafe and should 26 | // always be encoded because it is used in World Wide Web and in other 27 | // systems to delimit a URL from a fragment/anchor identifier that might 28 | // follow it. The character "%" is unsafe because it is used for 29 | // encodings of other characters. Other characters are unsafe because 30 | // gateways and other transport agents are known to sometimes modify 31 | // such characters. These characters are "{", "}", "|", "\", "^", "~", 32 | // "[", "]", and "`". 33 | // 34 | // The unsafe category here includes characters that are not explicitly defined 35 | // in RFC 3986. They cannot represent data in a URL, and should be encoded prior 36 | // to writing to the network. 37 | public static final String UNSAFE = "\"%<>\\^`{|}"; 38 | 39 | } 40 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/Strings.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import java.util.Arrays; 4 | 5 | public final class Strings { 6 | 7 | private Strings() {} 8 | 9 | public static int[] codePoints(String s) { 10 | int length = s.length(); 11 | int arrayPointer = 0; 12 | int stringPointer = 0; 13 | int[] codepoints = new int[length]; 14 | 15 | while (stringPointer < length) { 16 | int codepoint = s.codePointAt(stringPointer); 17 | codepoints[arrayPointer++] = codepoint; 18 | stringPointer += Character.charCount(codepoint); 19 | } 20 | 21 | return arrayPointer == stringPointer 22 | ? codepoints 23 | : Arrays.copyOf(codepoints, arrayPointer); 24 | } 25 | 26 | public static boolean isNullOrEmpty(String s) { 27 | return s == null || s.isEmpty(); 28 | } 29 | 30 | public static String nullToEmpty(String value) { 31 | return value == null ? "" : value; 32 | } 33 | 34 | 35 | public static String sanitizeWhitespace(String str) { 36 | 37 | int start = 0; 38 | int end = str.length(); 39 | 40 | while (start < end && CodepointMatcher.ASCII_WHITESPACE.matches(str.charAt(start))) { 41 | start++; 42 | } 43 | 44 | if (start == end) { 45 | return ""; 46 | } 47 | 48 | while (end > start && CodepointMatcher.ASCII_WHITESPACE.matches(str.charAt(end - 1))) { 49 | end--; 50 | } 51 | 52 | int firstNewline = -1; 53 | 54 | for (int i = start; i < end; i++) { 55 | if (CodepointMatcher.ASCII_NEWLINE.matches(str.charAt(i))) { 56 | firstNewline = i; 57 | break; 58 | } 59 | } 60 | 61 | if (firstNewline == -1) { 62 | if (start == 0 && end == str.length()) { 63 | return str; 64 | } else { 65 | return str.substring(start, end); 66 | } 67 | } 68 | 69 | char[] chars = str.toCharArray(); 70 | 71 | int p = firstNewline; 72 | for (int i = firstNewline; i < end; i++) { 73 | if (CodepointMatcher.ASCII_NEWLINE.matches(str.charAt(i))) { 74 | i++; 75 | while (i < end && CodepointMatcher.ASCII_WHITESPACE.matches(str.charAt(i))) { 76 | i++; 77 | } 78 | } 79 | chars[p++] = str.charAt(i); 80 | } 81 | 82 | return new String(chars, start, p - start); 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/Path.java: -------------------------------------------------------------------------------- 1 | package org.urllib; 2 | 3 | import java.util.List; 4 | import javax.annotation.Nonnull; 5 | 6 | /** 7 | * A hierarchical URL component that typically represents a location on a file system. 8 | * 9 | *

The following constraints exist: 10 | * 11 | *

17 | * 18 | *

As a result of these rules, all of the following yield the same path of {@code /a/b/}: 19 | * 20 | *

{@code
21 |  *   Path.of("a/b/");
22 |  *   Path.of("/a/b/");
23 |  *   Path.of("a", "b/");
24 |  *   Path.of("a", "b", "");
25 |  *   Path.of("\a\b\");
26 |  *   Path.of("/a/", "////b/");
27 |  * }
28 | */ 29 | public interface Path { 30 | 31 | /** 32 | * Returns {@code true} if the path is the root path. 33 | *
{@code
34 |    *   assertTrue(Path.of("").isEmpty());
35 |    *   assertTrue(Path.of("/").isEmpty());
36 |    *   assertFalse(Path.of("/a").isEmpty());
37 |    * }
38 | * 39 | *

The first and second example are true, and both equal to each other, 40 | * since this class enforces that all paths are absolute. 41 | */ 42 | boolean isEmpty(); 43 | 44 | /** 45 | * Returns all of the path segments, including the filename (if present). The segments 46 | * will not be percent encoded. 47 | * 48 | *

{@code
49 |    *   assertEquals(Arrays.asList("a"), Path.of("/a").segments());
50 |    *   assertEquals(Arrays.asList("a"), Path.of("/a/").segments());
51 |    *   assertEquals(Arrays.asList("a", "b"), Path.of("/a/b").segments());
52 |    *   assertEquals(Arrays.asList("a", "b"), Path.of("/a/b/").segments());
53 |    *   assertEquals(Arrays.asList("a", "b", "c"), Path.of("/a/b/c").segments());
54 |    * }
55 | */ 56 | @Nonnull List segments(); 57 | 58 | /** 59 | * Returns {@code true} if the path terminates in a forward slash. 60 | *
{@code
61 |    *   assertTrue(Path.of("/a/").isDirectory());
62 |    *   assertFalse(Path.of("/a").isDirectory());
63 |    * }
64 | */ 65 | boolean isDirectory(); 66 | 67 | /** 68 | * Returns the path's filename if present, otherwise the empty string. The result will not 69 | * be percent encoded. 70 | */ 71 | @Nonnull String filename(); 72 | 73 | @Nonnull String encoded(); 74 | 75 | @Nonnull Path resolve(String ref); 76 | } 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # urllib 2 | Urllib is a library that makes URL manipulation easy, fun, and safe! 3 | 4 | - [x] Zero extra dependencies. 5 | - [x] Supports Java 7+, Android 14+. 6 | - [x] Compliant with [RFC 3986](https://tools.ietf.org/html/rfc3986). 7 | - [x] Immutable and threadsafe. 8 | 9 | ```java 10 | System.out.println( 11 | Url.http("maps.google.com") 12 | .path("maps") 13 | .query("q", "Búðardalur") 14 | .create()); 15 | 16 | >> http://maps.google.com/maps?q=B%C3%BA%C3%B0ardalur 17 | 18 | System.out.println( 19 | Url.parse("https://www.wolframalpha.com/input/?i=%E2%88%9A-1") 20 | .query() 21 | .params()); 22 | 23 | >> {i=√-1} 24 | ``` 25 | 26 | ## We're in preview! 27 | 28 | Feel free to check out the code and give feedback! The first stable release will be 1.0. 29 | In the meantime, builds are available on [jitpack](https://jitpack.io/#org.urllib/urllib): 30 | 31 | ### Gradle 32 | 33 | ```gradle 34 | repositories { 35 | jcenter() 36 | maven { url 'https://jitpack.io' } 37 | } 38 | 39 | dependencies { 40 | compile 'org.urllib:urllib:master-SNAPSHOT' 41 | } 42 | ``` 43 | 44 | ### Maven 45 | 46 | ```maven 47 | 48 | 49 | jitpack.io 50 | https://jitpack.io 51 | 52 | 53 | 54 | 55 | 56 | org.urllib 57 | urllib 58 | master-SNAPSHOT 59 | 60 | 61 | ``` 62 | 63 | - [x] 0.1 64 | - Create a `Url` from scratch with builders. 65 | - Interop with `java.net.URI` 66 | - Support ASCII DNS hosts. 67 | - [x] 0.2 68 | - Expose component fields (scheme, host, path, etc..) via methods on the `Url` object. 69 | - [x] 0.3 70 | - Support IPv4 hosts. 71 | - [x] 0.4 72 | - Support IPv6 hosts. 73 | - [x] 0.5 74 | - Support IDN hosts. 75 | - [x] 0.6 76 | - Create a `Url` by parsing. 77 | - [x] 0.7 78 | - Utility method to create a `java.net.URI` from a previously-encoded `URL`. 79 | - [x] 0.8 80 | - Resolve a possibly-relative link against an existing `Url` 81 | - [ ] 0.9 82 | - Utility method to classify a potential URL. Is it junk? A protocol-relative URL? An absolute path? 83 | - [ ] 1.0 84 | - Encode a `Url` to display to users (like in a web browser URL bar) 85 | 86 | ## License 87 | [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) 88 | -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/QueryTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | 5 | import com.google.common.collect.ImmutableList; 6 | import com.google.common.collect.ImmutableMap; 7 | import java.util.Map; 8 | import org.junit.Test; 9 | import org.urllib.internal.Queries; 10 | 11 | public class QueryTest { 12 | 13 | @Test public void retainOrderIfSupportedByMap() { 14 | Map params = ImmutableMap.of( 15 | "a", "1", 16 | "b", "2" 17 | ); 18 | Query query = Queries.create(params); 19 | assertEquals( 20 | ImmutableList.of(Queries.create("a", "1"), Queries.create("b", "2")), 21 | query.params()); 22 | } 23 | 24 | @Test public void parsePlusIsSpace() { 25 | Query expected = Queries.create(ImmutableMap.of( 26 | "k1", " " 27 | )); 28 | assertEquals(expected, Queries.parse("k1= ")); 29 | assertEquals(expected, Queries.parse("k1=+")); 30 | assertEquals(expected, Queries.parse("k1=%20")); 31 | } 32 | 33 | @Test public void parseIncompleteKeyValuePairs() { 34 | Query expected = Queries.create(ImmutableMap.of( 35 | "k1", "", 36 | "k2", "" 37 | )); 38 | assertEquals(expected, Queries.parse("k1&k2")); 39 | assertEquals(expected, Queries.parse("k1=&k2")); 40 | assertEquals(expected, Queries.parse("k1&k2=")); 41 | assertEquals(expected, Queries.parse("k1&k2=")); 42 | assertEquals(expected, Queries.parse("k1=&k2=")); 43 | assertEquals(expected, Queries.parse("&k1=&k2=")); 44 | assertEquals(expected, Queries.parse("&k1&k2&")); 45 | } 46 | 47 | @Test public void parseRetainsOrderOfParams() { 48 | Query expected = Queries.create(ImmutableMap.of( 49 | "k1", "a", 50 | "k2", "b" 51 | )); 52 | assertEquals(expected, Queries.parse("k1=a&k2=b")); 53 | } 54 | 55 | @Test public void parseRemovesPercentEncodingAfterParsing() { 56 | Query expected = Queries.create(ImmutableMap.of( 57 | "k1", "a", 58 | "k2", "b" 59 | )); 60 | assertEquals(expected, Queries.parse("%6b1=a&%6b2=%62")); 61 | 62 | expected = Queries.create(ImmutableMap.of( 63 | "k1", "=" 64 | )); 65 | assertEquals(expected, Queries.parse("k1=%3d")); 66 | 67 | expected = Queries.create(ImmutableMap.of( 68 | "k1", "&" 69 | )); 70 | assertEquals(expected, Queries.parse("k1=%26")); 71 | } 72 | 73 | @Test public void parseRetainsInvalidEscapes() { 74 | Query expected = Queries.create(ImmutableMap.of( 75 | "k1", "%", 76 | "k2", "%z", 77 | "k3", "%zz" 78 | )); 79 | assertEquals(expected, Queries.parse("k1=%&k2=%z&k3=%zz")); 80 | } 81 | } -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 4.0.0 7 | 8 | org.urllib 9 | urllib-parent 10 | pom 11 | 0.8.0-SNAPSHOT 12 | urllib (Parent) 13 | A collection of URL manipulation tools for Java. 14 | https://github.com/EricEdens/urllib 15 | 16 | 17 | urllib 18 | urllibfuzz 19 | 20 | 21 | 22 | UTF-8 23 | 1.7 24 | 25 | 26 | 27 | http://github.com/EricEdens/urllib 28 | scm:git:git://github.com/EricEdens/urllib.git 29 | scm:git:ssh://git@github.com/EricEdens/urllib.git 30 | HEAD 31 | 32 | 33 | 34 | GitHub Issues 35 | http://github.com/EricEdens/urllib/issues 36 | 37 | 38 | 39 | 40 | Apache 2.0 41 | http://www.apache.org/licenses/LICENSE-2.0.txt 42 | 43 | 44 | 45 | 46 | 47 | junit 48 | junit 49 | 4.12 50 | test 51 | 52 | 53 | com.google.guava 54 | guava 55 | 23.3-jre 56 | test 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | org.apache.maven.plugins 65 | maven-compiler-plugin 66 | 3.7.0 67 | 68 | ${java.version} 69 | ${java.version} 70 | -Xlint:all 71 | true 72 | true 73 | 74 | 75 | 76 | 77 | maven-jar-plugin 78 | 3.0.2 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/UrlBuilder.java: -------------------------------------------------------------------------------- 1 | package org.urllib; 2 | 3 | import java.util.Collections; 4 | import java.util.Map; 5 | import javax.annotation.Nonnull; 6 | import org.urllib.Urls.ImmutableUrl; 7 | import org.urllib.internal.Paths; 8 | import org.urllib.internal.Queries; 9 | import org.urllib.internal.Scheme; 10 | import org.urllib.internal.authority.Authority; 11 | import org.urllib.internal.authority.Port; 12 | 13 | /** 14 | * Use the builder to create a {@link Url} from scratch. For example, this code creates a search 15 | * for Wolfram Alpha using fancy unicode characters:
{@code
16 |  *
17 |  *   Url url = Urls.https("www.wolframalpha.com")
18 |  *                 .path("input/")
19 |  *                 .query("i", "π²")
20 |  *                 .create();
21 |  *
22 |  *   System.out.println(url);
23 |  * }
24 | * 25 | * which prints: 26 | * https://www.wolframalpha.com/input/?i=%CF%80%C2%B2 27 | * 28 | * @since 1.0 29 | */ 30 | public final class UrlBuilder { 31 | 32 | @Nonnull final Scheme scheme; 33 | int port = -1; 34 | @Nonnull final Authority authority; 35 | @Nonnull Path path = Paths.empty(); 36 | @Nonnull Query query = Queries.empty(); 37 | @Nonnull String fragment = ""; 38 | 39 | UrlBuilder(Url url) { 40 | this.scheme = Scheme.valueOf(url.scheme()); 41 | this.port = url.port(); 42 | this.authority = Authority.split(url.host().name()); 43 | this.path = url.path(); 44 | this.query = url.query(); 45 | this.fragment = url.fragment(); 46 | } 47 | 48 | UrlBuilder(@Nonnull Scheme scheme, @Nonnull String host) { 49 | this.scheme = scheme; 50 | this.authority = Authority.split(host); 51 | if (authority.port() != -1) { 52 | port(authority.port()); 53 | } 54 | } 55 | 56 | public UrlBuilder port(int port) { 57 | this.port = Port.validateOrThrow(port); 58 | return this; 59 | } 60 | 61 | public UrlBuilder path(String... splittableSegments) { 62 | this.path = Paths.of(splittableSegments); 63 | return this; 64 | } 65 | 66 | UrlBuilder path(Path path) { 67 | this.path = path; 68 | return this; 69 | } 70 | 71 | public UrlBuilder query(String key, String value) { 72 | this.query = Queries.create(Collections.singletonMap(key, value)); 73 | return this; 74 | } 75 | 76 | public UrlBuilder query(Map query) { 77 | this.query = Queries.create(query); 78 | return this; 79 | } 80 | 81 | UrlBuilder query(Query query) { 82 | this.query = query; 83 | return this; 84 | } 85 | 86 | public UrlBuilder fragment(String fragment) { 87 | this.fragment = fragment; 88 | return this; 89 | } 90 | 91 | public Url create() { 92 | if (this.port == -1) { 93 | this.port = scheme.defaultPort(); 94 | } 95 | return ImmutableUrl.create(scheme.name(), authority.host(), port, 96 | path, query, fragment, scheme.defaultPort()); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /urllib/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | urllib-parent 7 | org.urllib 8 | 0.8.0-SNAPSHOT 9 | 10 | 4.0.0 11 | urllib 12 | 13 | 14 | 15 | 16 | org.codehaus.mojo 17 | animal-sniffer-maven-plugin 18 | 1.16 19 | 20 | 21 | java17 22 | test 23 | 24 | 25 | org.codehaus.mojo.signature 26 | java17 27 | 1.0 28 | 29 | 30 | 31 | check 32 | 33 | 34 | 35 | android14 36 | test 37 | 38 | 39 | net.sf.androidscents.signature 40 | android-api-level-14 41 | 4.0_r4 42 | 43 | 44 | 45 | check 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-source-plugin 53 | 54 | 55 | attach-sources 56 | 57 | jar 58 | 59 | 60 | 61 | 62 | 63 | org.apache.maven.plugins 64 | maven-javadoc-plugin 65 | 66 | 67 | attach-javadocs 68 | 69 | jar 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | com.google.auto.value 80 | auto-value 81 | 1.5.2 82 | provided 83 | 84 | 85 | com.google.code.findbugs 86 | jsr305 87 | 3.0.2 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/internal/authority/AuthorityTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | import static org.hamcrest.CoreMatchers.containsString; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertThat; 6 | import static org.junit.Assert.fail; 7 | 8 | import org.junit.Test; 9 | 10 | public class AuthorityTest { 11 | 12 | @Test public void ipv4() { 13 | assertEquals("1.1.1.1", 14 | Authority.split("1.1.1.1").host().name()); 15 | assertEquals("192.168.1.1", 16 | Authority.split("user:password@192.168.1.1").host().name()); 17 | } 18 | 19 | @Test public void ipv4_removeUpToOneTrailingDotInHost() { 20 | assertEquals("192.168.1.1", 21 | Authority.split("192.168.1.1.").host().name()); 22 | } 23 | 24 | @Test public void supportUnicodePeriods() { 25 | assertEquals("host.com", Authority.split("host。com").host().display()); 26 | assertEquals("host.com", Authority.split("host.com").host().display()); 27 | assertEquals("host.com", Authority.split("host。com").host().display()); 28 | } 29 | 30 | @Test public void failWhenHostIsEmpty() { 31 | expectUrlException("", "missing host"); 32 | expectUrlException(":", "missing host"); 33 | expectUrlException("user@host.com@", "missing host"); 34 | } 35 | 36 | @Test public void removeUserInfo() { 37 | assertEquals("host.com", Authority.split("user@host.com").host().display()); 38 | assertEquals("host.com", Authority.split("user@host.com@host.com").host().display()); 39 | assertEquals("host.com", Authority.split("user@host.com@host.com:80").host().display()); 40 | } 41 | 42 | @Test public void rejectInvalidPorts() { 43 | expectInvalidPortException("host.com:ab"); 44 | expectInvalidPortException("host.com:65536"); 45 | expectInvalidPortException("host.com:0"); 46 | expectInvalidPortException("host.com:-1"); 47 | } 48 | 49 | @Test public void trimLeadingZeroesInPort() { 50 | assertEquals(1, Authority.split("h:000000001").port()); 51 | assertEquals(10, Authority.split("h:010").port()); 52 | assertEquals(65535, Authority.split("h:00065535").port()); 53 | } 54 | 55 | @Test public void defaultPortIsMinusOne() { 56 | assertEquals(-1, Authority.split("h").port()); 57 | assertEquals(-1, Authority.split("h:").port()); 58 | } 59 | 60 | private void expectUrlException(String str, String msg) { 61 | try { 62 | Authority authority = Authority.split(str); 63 | fail("Expected UrlException; result was: " + authority); 64 | } catch (IllegalArgumentException expected) { 65 | assertThat(expected.getMessage(), containsString(msg)); 66 | } 67 | } 68 | 69 | private void expectInvalidPortException(String str) { 70 | try { 71 | Authority authority = Authority.split(str); 72 | fail("Expected IllegalArgumentException; result was: " + authority); 73 | } catch (IllegalArgumentException expected) { 74 | assertThat(expected.getMessage(), containsString("Invalid port in authority.")); 75 | } 76 | } 77 | } -------------------------------------------------------------------------------- /urllib/src/main/resources/encode-set.tsv: -------------------------------------------------------------------------------- 1 | char rep path query fragment 2 | 0x00 ␀ PERC PERC PERC 3 | 0x01 ␁ PERC PERC PERC 4 | 0x02 ␂ PERC PERC PERC 5 | 0x03 ␃ PERC PERC PERC 6 | 0x04 ␄ PERC PERC PERC 7 | 0x05 ␅ PERC PERC PERC 8 | 0x06 ␆ PERC PERC PERC 9 | 0x07 ␇ PERC PERC PERC 10 | 0x08 ␈ PERC PERC PERC 11 | 0x09 ␉ PERC PERC PERC 12 | 0x0A ␊ PERC PERC PERC 13 | 0x0B ␋ PERC PERC PERC 14 | 0x0C ␌ PERC PERC PERC 15 | 0x0D ␍ PERC PERC PERC 16 | 0x0E ␎ PERC PERC PERC 17 | 0x0F ␏ PERC PERC PERC 18 | 0x10 ␐ PERC PERC PERC 19 | 0x11 ␑ PERC PERC PERC 20 | 0x12 ␒ PERC PERC PERC 21 | 0x13 ␓ PERC PERC PERC 22 | 0x14 ␔ PERC PERC PERC 23 | 0x15 ␕ PERC PERC PERC 24 | 0x16 ␖ PERC PERC PERC 25 | 0x17 ␗ PERC PERC PERC 26 | 0x18 ␘ PERC PERC PERC 27 | 0x19 ␙ PERC PERC PERC 28 | 0x1A ␚ PERC PERC PERC 29 | 0x1B ␛ PERC PERC PERC 30 | 0x1C ␜ PERC PERC PERC 31 | 0x1D ␝ PERC PERC PERC 32 | 0x1E ␞ PERC PERC PERC 33 | 0x1F ␟ PERC PERC PERC 34 | 0x20 ␠ PERC PERC PERC 35 | 0x21 ! NONE NONE NONE 36 | 0x22 " PERC PERC PERC 37 | 0x23 # PERC PERC PERC 38 | 0x24 $ NONE NONE NONE 39 | 0x25 % PERC PERC PERC 40 | 0x26 & NONE PERC NONE 41 | 0x27 ' NONE NONE NONE 42 | 0x28 ( NONE NONE NONE 43 | 0x29 ) NONE NONE NONE 44 | 0x2A * NONE NONE NONE 45 | 0x2B + NONE PERC NONE 46 | 0x2C , NONE NONE NONE 47 | 0x2D - NONE NONE NONE 48 | 0x2E . NONE NONE NONE 49 | 0x2F / SLSH NONE NONE 50 | 0x30 0 NONE NONE NONE 51 | 0x31 1 NONE NONE NONE 52 | 0x32 2 NONE NONE NONE 53 | 0x33 3 NONE NONE NONE 54 | 0x34 4 NONE NONE NONE 55 | 0x35 5 NONE NONE NONE 56 | 0x36 6 NONE NONE NONE 57 | 0x37 7 NONE NONE NONE 58 | 0x38 8 NONE NONE NONE 59 | 0x39 9 NONE NONE NONE 60 | 0x3A : NONE NONE NONE 61 | 0x3B ; PERC PERC NONE 62 | 0x3C < PERC PERC PERC 63 | 0x3D = NONE PERC NONE 64 | 0x3E > PERC PERC PERC 65 | 0x3F ? PERC NONE NONE 66 | 0x40 @ NONE NONE NONE 67 | 0x41 A NONE NONE NONE 68 | 0x42 B NONE NONE NONE 69 | 0x43 C NONE NONE NONE 70 | 0x44 D NONE NONE NONE 71 | 0x45 E NONE NONE NONE 72 | 0x46 F NONE NONE NONE 73 | 0x47 G NONE NONE NONE 74 | 0x48 H NONE NONE NONE 75 | 0x49 I NONE NONE NONE 76 | 0x4A J NONE NONE NONE 77 | 0x4B K NONE NONE NONE 78 | 0x4C L NONE NONE NONE 79 | 0x4D M NONE NONE NONE 80 | 0x4E N NONE NONE NONE 81 | 0x4F O NONE NONE NONE 82 | 0x50 P NONE NONE NONE 83 | 0x51 Q NONE NONE NONE 84 | 0x52 R NONE NONE NONE 85 | 0x53 S NONE NONE NONE 86 | 0x54 T NONE NONE NONE 87 | 0x55 U NONE NONE NONE 88 | 0x56 V NONE NONE NONE 89 | 0x57 W NONE NONE NONE 90 | 0x58 X NONE NONE NONE 91 | 0x59 Y NONE NONE NONE 92 | 0x5A Z NONE NONE NONE 93 | 0x5B [ PERC PERC PERC 94 | 0x5C \ SLSH PERC PERC 95 | 0x5D ] PERC PERC PERC 96 | 0x5E ^ PERC PERC PERC 97 | 0x5F _ NONE NONE NONE 98 | 0x60 ` PERC PERC PERC 99 | 0x61 a NONE NONE NONE 100 | 0x62 b NONE NONE NONE 101 | 0x63 c NONE NONE NONE 102 | 0x64 d NONE NONE NONE 103 | 0x65 e NONE NONE NONE 104 | 0x66 f NONE NONE NONE 105 | 0x67 g NONE NONE NONE 106 | 0x68 h NONE NONE NONE 107 | 0x69 i NONE NONE NONE 108 | 0x6A j NONE NONE NONE 109 | 0x6B k NONE NONE NONE 110 | 0x6C l NONE NONE NONE 111 | 0x6D m NONE NONE NONE 112 | 0x6E n NONE NONE NONE 113 | 0x6F o NONE NONE NONE 114 | 0x70 p NONE NONE NONE 115 | 0x71 q NONE NONE NONE 116 | 0x72 r NONE NONE NONE 117 | 0x73 s NONE NONE NONE 118 | 0x74 t NONE NONE NONE 119 | 0x75 u NONE NONE NONE 120 | 0x76 v NONE NONE NONE 121 | 0x77 w NONE NONE NONE 122 | 0x78 x NONE NONE NONE 123 | 0x79 y NONE NONE NONE 124 | 0x7A z NONE NONE NONE 125 | 0x7B { PERC PERC PERC 126 | 0x7C | PERC PERC PERC 127 | 0x7D } PERC PERC PERC 128 | 0x7E ~ NONE NONE NONE 129 | 0x7F ␡ PERC PERC PERC -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/Queries.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import com.google.auto.value.AutoValue; 4 | import java.util.ArrayList; 5 | import java.util.Collections; 6 | import java.util.HashMap; 7 | import java.util.Iterator; 8 | import java.util.LinkedList; 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.Map.Entry; 12 | import org.urllib.Query; 13 | import org.urllib.Query.KeyValue; 14 | 15 | public class Queries { 16 | 17 | private static final Query empty = of(Collections.emptyList()); 18 | 19 | public static Query create(Map paramMap) { 20 | List params = new ArrayList<>(paramMap.size()); 21 | for (Entry param : paramMap.entrySet()) { 22 | params.add(create(param.getKey(), param.getValue())); 23 | } 24 | return of(params); 25 | } 26 | 27 | public static Query empty() { 28 | return empty; 29 | } 30 | 31 | public static Query of(List params) { 32 | return ImmutableQuery.create(params); 33 | } 34 | 35 | public static Query parse(String query) { 36 | if (query.isEmpty()) return empty(); 37 | query = query.replace('+', ' '); 38 | List params = new LinkedList<>(); 39 | int p = 0; 40 | int equal = -1; 41 | for (int i = 0; i <= query.length(); i++) { 42 | if (i == query.length() || query.charAt(i) == '&') { 43 | if (i == p) { 44 | } else if (equal == -1) { 45 | String key = PercentDecoder.decodeAll(query.substring(p, i)); 46 | params.add(create(key, "")); 47 | } else { 48 | String key = (p == equal) 49 | ? "" 50 | : PercentDecoder.decodeAll(query.substring(p, equal)); 51 | String value = (i == equal + 1) 52 | ? "" 53 | : PercentDecoder.decodeAll(query.substring(equal + 1, i)); 54 | params.add(create(key, value)); 55 | } 56 | equal = -1; 57 | p = i + 1; 58 | } else if (query.charAt(i) == '=' && equal == -1) { 59 | equal = i; 60 | } 61 | } 62 | return of(params); 63 | } 64 | 65 | public static KeyValue create(String key, String value) { 66 | return new AutoValue_Queries_ImmutableKeyValue(key, Strings.nullToEmpty(value)); 67 | } 68 | 69 | @AutoValue 70 | abstract static class ImmutableKeyValue implements KeyValue { 71 | } 72 | 73 | @AutoValue 74 | abstract static class ImmutableQuery implements Query { 75 | 76 | static Query create(List params) { 77 | return new AutoValue_Queries_ImmutableQuery( 78 | Collections.unmodifiableList(params), toMap(params), encode(params)); 79 | } 80 | 81 | private static Map toMap(List params) { 82 | Map map = new HashMap<>(); 83 | for (KeyValue keyValue : params) { 84 | if (!map.containsKey(keyValue.key())) { 85 | map.put(keyValue.key(), keyValue.value()); 86 | } 87 | } 88 | return Collections.unmodifiableMap(map); 89 | } 90 | 91 | private static String encode(List params) { 92 | StringBuilder sb = new StringBuilder(); 93 | for (Iterator iterator = params.iterator(); iterator.hasNext(); ) { 94 | KeyValue param = iterator.next(); 95 | sb.append(PercentEncoder.encodeQueryComponentNoPlusForSpace(param.key())); 96 | if (param.value() != null && !param.value().isEmpty()) { 97 | sb.append('=') 98 | .append(PercentEncoder.encodeQueryComponentNoPlusForSpace(param.value())); 99 | } 100 | if (iterator.hasNext()) { 101 | sb.append('&'); 102 | } 103 | } 104 | return sb.toString(); 105 | } 106 | 107 | @Override public boolean isEmpty() { 108 | return params().isEmpty(); 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/CodepointMatcher.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import java.util.Arrays; 4 | import javax.annotation.Nonnull; 5 | 6 | public abstract class CodepointMatcher { 7 | 8 | public abstract boolean matches(int codepoint); 9 | 10 | public String trim(@Nonnull String str) { 11 | if (str.isEmpty()) return str; 12 | int start = 0; 13 | int end = str.length(); 14 | while (start < end && matches(str.charAt(start))) { 15 | start++; 16 | } 17 | while (start < end && matches(str.charAt(end - 1))) { 18 | end--; 19 | } 20 | return start == 0 && end == str.length() 21 | ? str 22 | : str.substring(start, end); 23 | } 24 | 25 | public boolean matches(char c) { 26 | return matches((int) c); 27 | } 28 | 29 | 30 | public static final CodepointMatcher NONE = new CodepointMatcher() { 31 | @Override public boolean matches(int codepoint) { 32 | return false; 33 | } 34 | }; 35 | 36 | public static final CodepointMatcher ALL = new CodepointMatcher() { 37 | @Override public boolean matches(int codepoint) { 38 | return true; 39 | } 40 | }; 41 | 42 | // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:White_Space=Yes:] 43 | public static final CodepointMatcher UNICODE_WHITESPACE = anyOf( 44 | 0x0009, 45 | 0x000A, 46 | 0x000B, 47 | 0x000C, 48 | 0x000D, 49 | 0x0020, 50 | 0x0085, 51 | 0x00A0, 52 | 0x1680, 53 | 0x2000, 54 | 0x2001, 55 | 0x2002, 56 | 0x2003, 57 | 0x2004, 58 | 0x2005, 59 | 0x2006, 60 | 0x2007, 61 | 0x2008, 62 | 0x2009, 63 | 0x200A, 64 | 0x2028, 65 | 0x2029, 66 | 0x202F, 67 | 0x205F, 68 | 0x3000); 69 | 70 | public static final CodepointMatcher ASCII_WHITESPACE = anyOf( 71 | 0x0009, 72 | 0x000A, 73 | 0x000C, 74 | 0x000D, 75 | 0x0020); 76 | 77 | public static final CodepointMatcher ASCII_NEWLINE = anyOf('\n', '\r'); 78 | 79 | public static final CodepointMatcher ALPHA = new CodepointMatcher() { 80 | @Override public boolean matches(int codepoint) { 81 | return codepoint >= 'a' && codepoint <= 'z' 82 | || codepoint >= 'A' && codepoint <= 'Z'; 83 | } 84 | }; 85 | 86 | public static final CodepointMatcher DIGIT = new CodepointMatcher() { 87 | @Override public boolean matches(int codepoint) { 88 | return codepoint >= '0' && codepoint <= '9'; 89 | } 90 | }; 91 | 92 | public static final CodepointMatcher HEX = new CodepointMatcher() { 93 | @Override public boolean matches(int codepoint) { 94 | return codepoint >= '0' && codepoint <= '9' 95 | || codepoint >= 'a' && codepoint <= 'f' 96 | || codepoint >= 'A' && codepoint <= 'F'; 97 | } 98 | }; 99 | 100 | public static final CodepointMatcher ALPHANUMERIC = or(ALPHA, DIGIT); 101 | 102 | public static final CodepointMatcher UNRESERVED = new CodepointMatcher() { 103 | @Override public boolean matches(int codepoint) { 104 | return codepoint >= 'a' && codepoint <= 'z' 105 | || codepoint >= 'A' && codepoint <= 'Z' 106 | || codepoint >= '0' && codepoint <= '9' 107 | || codepoint == '-' 108 | || codepoint == '.' 109 | || codepoint == '_' 110 | || codepoint == '~'; 111 | } 112 | }; 113 | 114 | public static CodepointMatcher or(final CodepointMatcher one, final CodepointMatcher two) { 115 | return new CodepointMatcher() { 116 | @Override public boolean matches(int codepoint) { 117 | return one.matches(codepoint) || two.matches(codepoint); 118 | } 119 | }; 120 | } 121 | 122 | public static CodepointMatcher anyOf(String str) { 123 | return anyOf(Strings.codePoints(str)); 124 | } 125 | 126 | public static CodepointMatcher anyOf(final int... codepoints) { 127 | Arrays.sort(codepoints); 128 | return new CodepointMatcher() { 129 | @Override public boolean matches(int codepoint) { 130 | return Arrays.binarySearch(codepoints, codepoint) > -1; 131 | } 132 | }; 133 | } 134 | 135 | public static CodepointMatcher or(final char c1, final char c2) { 136 | return new CodepointMatcher() { 137 | @Override public boolean matches(int codepoint) { 138 | return (codepoint == c1) || (codepoint == c2); 139 | } 140 | }; 141 | } 142 | 143 | public boolean matchesAnyOf(String str) { 144 | for (int stringPointer = 0; stringPointer < str.length(); ) { 145 | int codepoint = str.codePointAt(stringPointer); 146 | if (matches(codepoint)) { 147 | return true; 148 | } 149 | stringPointer += Character.charCount(codepoint); 150 | } 151 | return false; 152 | } 153 | 154 | } 155 | -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/TestEncodingRules.java: -------------------------------------------------------------------------------- 1 | package org.urllib; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertNotEquals; 5 | 6 | import com.google.common.base.Function; 7 | import java.io.IOException; 8 | import java.nio.file.Paths; 9 | import java.util.Scanner; 10 | import org.junit.BeforeClass; 11 | import org.junit.Test; 12 | import org.urllib.internal.EncodeRules; 13 | 14 | public class TestEncodingRules { 15 | 16 | private static final Rule[] PATH = new Rule[0x80]; 17 | private static final Rule[] QUERY = new Rule[0x80]; 18 | private static final Rule[] FRAGMENT = new Rule[0x80]; 19 | 20 | @BeforeClass 21 | public static void importRules() throws IOException { 22 | Scanner scanner = new Scanner(Paths.get("src/main/resources", "encode-set.tsv")); 23 | // Throw away header. 24 | scanner.nextLine(); 25 | while (scanner.hasNextLine()) { 26 | Integer point = Integer.decode(scanner.next()); 27 | String rep = scanner.next(); 28 | PATH[point] = Rule.valueOf(scanner.next()); 29 | QUERY[point] = Rule.valueOf(scanner.next()); 30 | FRAGMENT[point] = Rule.valueOf(scanner.next()); 31 | } 32 | } 33 | 34 | @Test public void pathObjectFromUrlBuilder() { 35 | Function codepoint = new Function() { 36 | @Override public String apply(String codepoint) { 37 | Url url = Urls.http("host.com").path("_" + codepoint).create(); 38 | String path = url.path().encoded(); 39 | return path.substring(2, path.length()); 40 | } 41 | }; 42 | run(codepoint, PATH); 43 | } 44 | 45 | @Test public void pathFromUrlBuilder() { 46 | Function codepoint = new Function() { 47 | @Override public String apply(String codepoint) { 48 | Url url = Urls.http("host.com").path("_" + codepoint).create(); 49 | return url.toString().replace("http://host.com/_", ""); 50 | } 51 | }; 52 | run(codepoint, PATH); 53 | } 54 | 55 | @Test public void queryObjectFromUrlBuilder() { 56 | Function codepoint = new Function() { 57 | @Override public String apply(String codepoint) { 58 | Url url = Urls.http("host.com").query("key", codepoint).create(); 59 | return url.query().encoded().replace("key=", ""); 60 | } 61 | }; 62 | run(codepoint, QUERY); 63 | } 64 | 65 | @Test public void queryFromUrlBuilder() { 66 | Function codepoint = new Function() { 67 | @Override public String apply(String codepoint) { 68 | Url url = Urls.http("host.com").query("key", codepoint).create(); 69 | return url.toString().replace("http://host.com/?key=", ""); 70 | } 71 | }; 72 | run(codepoint, QUERY); 73 | } 74 | 75 | @Test public void fragmentFromBuilder() { 76 | Function codepoint = new Function() { 77 | @Override public String apply(String codepoint) { 78 | Url url = Urls.http("host.com").fragment(codepoint).create(); 79 | return url.toString().replace("http://host.com/#", ""); 80 | } 81 | }; 82 | run(codepoint, FRAGMENT); 83 | } 84 | 85 | @Test public void unsafeShouldAlwaysBeEncoded() { 86 | for (int i = 0; i < EncodeRules.UNSAFE.length(); i++) { 87 | int codepoint = EncodeRules.UNSAFE.codePointAt(i); 88 | if (codepoint == '\\') { 89 | assertNotEquals(Rule.NONE, PATH[codepoint]); 90 | } else { 91 | assertEquals(Rule.PERC, PATH[codepoint]); 92 | } 93 | 94 | assertEquals(Rule.PERC, QUERY[codepoint]); 95 | assertEquals(Rule.PERC, FRAGMENT[codepoint]); 96 | } 97 | } 98 | 99 | @Test public void unreservedShouldNeverBeEncoded() { 100 | for (int i = 0; i < EncodeRules.UNRESERVED.length(); i++) { 101 | int codepoint = EncodeRules.UNRESERVED.codePointAt(i); 102 | assertEquals(Rule.NONE, PATH[codepoint]); 103 | assertEquals(Rule.NONE, QUERY[codepoint]); 104 | assertEquals(Rule.NONE, FRAGMENT[codepoint]); 105 | } 106 | } 107 | 108 | private void run(Function codepoint, Rule[] path) { 109 | for (char c = 0; c < 0x80; c++) { 110 | String expected; 111 | switch (path[c]) { 112 | case NONE: 113 | expected = "" + c; 114 | break; 115 | case PERC: 116 | expected = String.format("%%%02X", (int) c); 117 | break; 118 | case SLSH: 119 | expected = "/"; 120 | break; 121 | case PLUS: 122 | expected = "+"; 123 | break; 124 | default: 125 | throw new AssertionError(path[c]); 126 | } 127 | String actual = codepoint.apply("" + c); 128 | assertEquals(expected, actual); 129 | } 130 | 131 | } 132 | 133 | private enum Rule { 134 | NONE, PERC, SLSH, PLUS 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/SplitUrl.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import com.google.auto.value.AutoValue; 4 | import java.util.Locale; 5 | import javax.annotation.Nullable; 6 | 7 | @AutoValue 8 | public abstract class SplitUrl { 9 | 10 | private static final CodepointMatcher slash = CodepointMatcher.or('/', '\\'); 11 | 12 | @Nullable public abstract String scheme(); 13 | @Nullable public abstract String authority(); 14 | @Nullable public abstract String path(); 15 | @Nullable public abstract String query(); 16 | @Nullable public abstract String fragment(); 17 | @Nullable public abstract Type urlType(); 18 | 19 | public static SplitUrl split(String url) { 20 | 21 | String trim = CodepointMatcher.ASCII_WHITESPACE.trim(url); 22 | 23 | if (trim.isEmpty()) { 24 | return SplitUrl.builder() 25 | .path("") 26 | .urlType(Type.PATH_RELATIVE) 27 | .build(); 28 | } 29 | 30 | Builder builder; 31 | if (CodepointMatcher.ALPHA.matches(trim.charAt(0))) { 32 | builder = fullUrlOrRelativePath(trim); 33 | } else if (slash.matches(trim.charAt(0))) { 34 | if (trim.length() == 2 && slash.matches(trim.charAt(1))) { 35 | return SplitUrl.builder() 36 | .urlType(Type.PROTOCOL_RELATIVE) 37 | .build(); 38 | } else if (trim.length() > 1 && slash.matches(trim.charAt(1))) { 39 | builder = authority(trim, 0).urlType(Type.PROTOCOL_RELATIVE); 40 | } else { 41 | builder = path(trim, 0).urlType(Type.PATH_ABSOLUTE); 42 | } 43 | } else if (trim.charAt(0) == '#') { 44 | builder = fragment(trim, 0).urlType(Type.FRAGMENT); 45 | } else { 46 | builder = path(trim, 0).urlType(Type.PATH_RELATIVE); 47 | } 48 | return builder.build(); 49 | } 50 | 51 | private static Builder fullUrlOrRelativePath(String url) { 52 | for (int i = 1; i < url.length(); i++) { 53 | char c = url.charAt(i); 54 | if (CodepointMatcher.ALPHANUMERIC.matches(c) || c == '+' || c == '-' || c == '.') { 55 | continue; 56 | } else if (c == ':') { 57 | return authority(url, i + 1) 58 | .scheme(url.substring(0, i).toLowerCase(Locale.US)) 59 | .urlType(Type.FULL); 60 | } else { 61 | break; 62 | } 63 | } 64 | 65 | return path(url, 0) 66 | .urlType(Type.PATH_RELATIVE); 67 | } 68 | 69 | private static Builder authority(String url, int start) { 70 | 71 | while (start < url.length() && slash.matches(url.charAt(start))) { 72 | start++; 73 | } 74 | 75 | if (start >= url.length()) { 76 | throw new IllegalArgumentException("URL missing host name: " + url); 77 | } 78 | 79 | Builder builder = null; 80 | int i; 81 | for (i = start; i < url.length(); i++) { 82 | char c = url.charAt(i); 83 | if (c == '/' || c == '\\') { 84 | builder = path(url, i); 85 | break; 86 | } else if (c == '?') { 87 | builder = query(url, i); 88 | break; 89 | } else if (c == '#') { 90 | builder = fragment(url, i); 91 | break; 92 | } 93 | } 94 | 95 | if (builder == null) { 96 | builder = builder(); 97 | } 98 | 99 | return builder.authority(url.substring(start, i)); 100 | } 101 | 102 | private static Builder path(String url, int start) { 103 | Builder builder = null; 104 | int i; 105 | loop: 106 | for (i = start; i < url.length(); i++) { 107 | char c = url.charAt(i); 108 | if (c == '?') { 109 | builder = query(url, i); 110 | break; 111 | } else if (c == '#') { 112 | builder = fragment(url, i); 113 | break; 114 | } 115 | } 116 | 117 | if (builder == null) { 118 | builder = builder(); 119 | } 120 | 121 | if (i > start) { 122 | builder.path(url.substring(start, i)); 123 | } 124 | 125 | return builder; 126 | } 127 | 128 | private static Builder query(String url, int start) { 129 | Builder builder = null; 130 | int i; 131 | start++; 132 | for (i = start; i < url.length(); i++) { 133 | if (url.charAt(i) == '#') { 134 | builder = fragment(url, i); 135 | break; 136 | } 137 | } 138 | 139 | if (builder == null) { 140 | builder = builder(); 141 | } 142 | 143 | return builder.query(url.substring(start, i)); 144 | } 145 | 146 | private static Builder fragment(String url, int start) { 147 | return builder().fragment(url.substring(start + 1)); 148 | } 149 | 150 | static Builder builder() { 151 | return new AutoValue_SplitUrl.Builder(); 152 | } 153 | 154 | @AutoValue.Builder 155 | abstract static class Builder { 156 | 157 | public abstract Builder scheme(String scheme); 158 | public abstract Builder authority(String authority); 159 | public abstract Builder path(String path); 160 | public abstract Builder query(String query); 161 | public abstract Builder fragment(String fragment); 162 | public abstract Builder urlType(Type type); 163 | public abstract SplitUrl build(); 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/authority/Ip6.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | import com.google.auto.value.AutoValue; 4 | import org.urllib.Host; 5 | 6 | @AutoValue 7 | abstract class Ip6 implements Host { 8 | 9 | // There are 16 bytes in Ipv6; we track 10 | // the address as 8 shorts. 11 | private static final int ADDRLEN = 8; 12 | 13 | static Ip6 parse(String ip) { 14 | 15 | // 1. Trim brackets, if present. 16 | int start = 0; 17 | int end = ip.length(); 18 | if (ip.startsWith("[")) { 19 | if (!ip.endsWith("]")) { 20 | return null; 21 | } 22 | start++; 23 | end--; 24 | } 25 | 26 | // 2. Short circuit for addresses that 27 | // are empty or too small. 28 | switch (end - start) { 29 | case 0: 30 | case 1: 31 | return null; 32 | case 2: 33 | return "::".equals(ip.substring(start, end)) 34 | ? fromAddress(new int[8]) 35 | : null; 36 | default: 37 | } 38 | 39 | int[] addr = new int[ADDRLEN]; 40 | int addrPointer = 0; 41 | int compressionStarts = -1; 42 | 43 | // 3. Ensure that a leading colon means 44 | // there are exactly two colons, in 45 | // which case the address is compressed 46 | // at the front. 47 | if (ip.charAt(start) == ':') { 48 | if (ip.charAt(start + 1) != ':') { 49 | return null; 50 | } 51 | if (ip.charAt(start + 2) == ':') { 52 | return null; 53 | } 54 | compressionStarts = 0; 55 | start += 2; 56 | } 57 | 58 | // 4. Split at each segment, 59 | // interpreting the character 60 | // hex values. 61 | for (int i = start; i < end; i++) { 62 | if (addrPointer == ADDRLEN) { 63 | return null; 64 | } 65 | if (ip.charAt(i) == ':') { 66 | if (compressionStarts != -1) { 67 | return null; 68 | } 69 | compressionStarts = addrPointer; 70 | continue; 71 | } 72 | 73 | // 5. Decode the hex segment. 74 | int segEnd = Math.min(i + 4, end); 75 | int segVal = 0; 76 | for (; i < segEnd; i++) { 77 | char c = ip.charAt(i); 78 | if (c == ':') { 79 | break; 80 | } 81 | int hex = toHex(c); 82 | if (hex == -1) { 83 | return null; 84 | } 85 | segVal = segVal * 16 | hex; 86 | } 87 | addr[addrPointer++] = segVal; 88 | 89 | // 6. Ensure that the ip address 90 | // doesn't end in a colon. 91 | if (end == i) { 92 | break; 93 | } else if (ip.charAt(i) == ':') { 94 | // Don't allow trailing colon. 95 | if (i == end - 1) { 96 | return null; 97 | } 98 | } else { 99 | return null; 100 | } 101 | } 102 | 103 | // 7. Insert the compressed zeroes. 104 | if (compressionStarts == -1 && addrPointer < ADDRLEN - 1) { 105 | return null; 106 | } else if (compressionStarts > -1) { 107 | if (addrPointer == ADDRLEN) { 108 | return null; 109 | } 110 | for (int i = 1; i <= addrPointer - compressionStarts; i++) { 111 | addr[ADDRLEN - i] = addr[addrPointer - i]; 112 | addr[addrPointer - i] = 0; 113 | } 114 | } 115 | 116 | return fromAddress(addr); 117 | } 118 | 119 | private static Ip6 fromAddress(int[] addr) { 120 | // 1. Find where to compress. Prefer compressing on right side, 121 | // and only compress if more than one segment can be eliminated. 122 | byte[] zeroesStartingHere = { 123 | 0, 0, 0, 0, 0, 0, 0, (byte) (addr[7] == 0 ? 1 : 0) 124 | }; 125 | int startCompress = ADDRLEN; 126 | byte numCompress = 0; 127 | for (int i = addr.length - 2; i >= 0; i--) { 128 | if (addr[i] == 0) { 129 | zeroesStartingHere[i] = (byte) (zeroesStartingHere[i + 1] + 1); 130 | if (zeroesStartingHere[i] > 1 && zeroesStartingHere[i] >= numCompress) { 131 | numCompress = zeroesStartingHere[i]; 132 | startCompress = i; 133 | } 134 | } 135 | } 136 | 137 | int endCompress = startCompress == ADDRLEN 138 | ? ADDRLEN 139 | : startCompress + zeroesStartingHere[startCompress]; 140 | 141 | StringBuilder sb = new StringBuilder(); 142 | if (startCompress == 0) { 143 | sb.append(':'); 144 | } 145 | for (int i = 0; i < ADDRLEN; i++) { 146 | if (i == startCompress) { 147 | sb.append(':'); 148 | continue; 149 | } else if (i > startCompress && i < endCompress) { 150 | continue; 151 | } 152 | sb.append(Integer.toHexString(addr[i])); 153 | if (i < addr.length - 1) { 154 | sb.append(':'); 155 | } 156 | } 157 | String noBrackets = sb.toString(); 158 | return new AutoValue_Ip6('[' + noBrackets + ']', noBrackets); 159 | } 160 | 161 | private static int toHex(char c) { 162 | int hex; 163 | if ('0' <= c && c <= '9') { 164 | hex = c - '0'; 165 | } else if ('a' <= c && c <= 'f') { 166 | hex = 10 + c - 'a'; 167 | } else if ('A' <= c && c <= 'F') { 168 | hex = 10 + c - 'A'; 169 | } else { 170 | hex = -1; 171 | } 172 | return hex; 173 | } 174 | 175 | static boolean isIpv6(String hostname) { 176 | for (int i = 0; i < hostname.length(); i++) { 177 | switch (hostname.charAt(i)) { 178 | case '[': 179 | case ':': 180 | return true; 181 | case '.': 182 | return false; 183 | } 184 | } 185 | return false; 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/Paths.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import com.google.auto.value.AutoValue; 4 | import java.util.Collections; 5 | import java.util.Iterator; 6 | import java.util.LinkedList; 7 | import java.util.List; 8 | import java.util.regex.Pattern; 9 | import javax.annotation.Nonnull; 10 | import org.urllib.Path; 11 | 12 | public class Paths { 13 | 14 | /** 15 | * Creates a {@link org.urllib.Path} by joining {@code segments}. 16 | * 17 | *
    18 | *
  • Backslashes (\) are converted to forward slashes (/)
  • 19 | *
  • Segments are then split at the slash character.
  • 20 | *
  • Empty path components are removed.
  • 21 | *
22 | * 23 | *

To create a path with a trailing slash, either terminate the last segment with a slash, 24 | * or include empty segment as the last segment: 25 | * 26 | *

{@code
 27 |    *   // Both generate `/path/`:
 28 |    *   Path.of("path/");
 29 |    *   Path.of("path", "");
 30 |    * }
31 | * 32 | *

The result will always be an absolute path, regardless of whether the first character 33 | * is a slash or not. 34 | * 35 | *

{@code
 36 |    *   // All generate `/path`:
 37 |    *   Path.of("path");
 38 |    *   Path.of("/path");
 39 |    *   Path.of("./path");
 40 |    *   Path.of("../path");
 41 |    * }
42 | */ 43 | public static Path of(String... segments) { 44 | if (segments.length == 0) { 45 | return ImmutablePath.EMPTY; 46 | } 47 | 48 | PathBuilder pathBuilder = new PathBuilder(); 49 | for (String segment : segments) { 50 | pathBuilder.splitAndAdd(segment, false); 51 | } 52 | 53 | return ImmutablePath.create(pathBuilder); 54 | } 55 | 56 | public static Path parse(String path) { 57 | return path.isEmpty() 58 | ? ImmutablePath.EMPTY 59 | : ImmutablePath.create(new PathBuilder().splitAndAdd(path, true)); 60 | } 61 | 62 | public static Path empty() { 63 | return ImmutablePath.EMPTY; 64 | } 65 | 66 | static class PathBuilder { 67 | 68 | private static final CodepointMatcher SLASH_MATCHER = CodepointMatcher.anyOf("/\\"); 69 | private static final Pattern SINGLE_DOT = Pattern.compile("^\\.|%2[eE]$"); 70 | private static final Pattern DOUBLE_DOT = Pattern.compile("^(\\.|%2[eE]){2}$"); 71 | 72 | private final LinkedList segments; 73 | private boolean isDir; 74 | 75 | PathBuilder() { 76 | this(new LinkedList(), false); 77 | } 78 | 79 | PathBuilder(LinkedList segments, boolean isDir) { 80 | this.segments = segments; 81 | this.isDir = isDir; 82 | } 83 | 84 | PathBuilder splitAndAdd(String segment, boolean decode) { 85 | if (SLASH_MATCHER.matchesAnyOf(segment)) { 86 | int i = 0; 87 | int j = 0; 88 | for (; j < segment.length(); j++) { 89 | if (SLASH_MATCHER.matches(segment.charAt(j))) { 90 | add(segment.substring(i, j), decode); 91 | i = j + 1; 92 | } 93 | } 94 | return add(segment.substring(i, j), decode); 95 | } else { 96 | return add(segment, decode); 97 | } 98 | } 99 | 100 | private PathBuilder add(String segment, boolean decode) { 101 | if (segment.isEmpty()) { 102 | isDir = true; 103 | } else if (SINGLE_DOT.matcher(segment).matches()) { 104 | isDir = true; 105 | } else if (DOUBLE_DOT.matcher(segment).matches()) { 106 | if (!segments.isEmpty()) { 107 | segments.removeLast(); 108 | } 109 | isDir = true; 110 | } else { 111 | segments.add(decode ? PercentDecoder.decodeAll(segment) : segment); 112 | isDir = false; 113 | } 114 | return this; 115 | } 116 | } 117 | 118 | @AutoValue 119 | abstract static class ImmutablePath implements Path { 120 | 121 | private static final Path EMPTY = create(Collections.emptyList(), true); 122 | 123 | static Path create(PathBuilder builder) { 124 | return create(builder.segments, builder.isDir); 125 | } 126 | 127 | static Path create(List segments, boolean isDir) { 128 | String filename = isDir ? "" : segments.get(segments.size() - 1); 129 | return new AutoValue_Paths_ImmutablePath(segments.isEmpty(), segments, isDir, 130 | filename, encode(isDir, segments)); 131 | } 132 | 133 | private static String encode(boolean isDir, List segments) { 134 | StringBuilder sb = new StringBuilder("/"); 135 | for (Iterator iterator = segments.iterator(); iterator.hasNext(); ) { 136 | String segment = iterator.next(); 137 | sb.append(PercentEncoder.encodePathSegment(segment)); 138 | if (iterator.hasNext() || isDir) { 139 | sb.append('/'); 140 | } 141 | } 142 | return sb.toString(); 143 | } 144 | 145 | @Nonnull @Override public Path resolve(String reference) { 146 | 147 | if (reference.isEmpty()) { 148 | return this; 149 | } 150 | 151 | loop: 152 | for (int i = 0; i < reference.length(); i++) { 153 | char c = reference.charAt(i); 154 | switch (c) { 155 | case ':': 156 | throw new IllegalArgumentException( 157 | "Paths.resolve can only be used with a relative or absolute path, not a full URL."); 158 | case '/': 159 | case '\\': 160 | if (i == 0) { 161 | return parse(reference); 162 | } else { 163 | break loop; 164 | } 165 | } 166 | } 167 | 168 | LinkedList segments = new LinkedList<>(segments()); 169 | if (!isDirectory()) { 170 | segments.removeLast(); 171 | } 172 | return create(new PathBuilder(segments, true).splitAndAdd(reference, true)); 173 | } 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/internal/SplitUrlTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import static org.hamcrest.CoreMatchers.containsString; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertThat; 6 | 7 | import org.junit.Test; 8 | 9 | public class SplitUrlTest { 10 | 11 | @Test public void empty() { 12 | SplitUrl expected = SplitUrl.builder() 13 | .path("") 14 | .urlType(Type.PATH_RELATIVE) 15 | .build(); 16 | assertEquals(expected, SplitUrl.split("")); 17 | } 18 | 19 | @Test public void length1() { 20 | assertEquals(onlyPath("/"), SplitUrl.split("/")); 21 | assertEquals(onlyPath("p"), SplitUrl.split("p")); 22 | assertEquals(onlyPath(":"), SplitUrl.split(":")); 23 | assertEquals(onlyQuery(""), SplitUrl.split("?")); 24 | assertEquals(onlyFragment(""), SplitUrl.split("#")); 25 | } 26 | 27 | 28 | @Test public void withScheme() { 29 | SplitUrl expected = SplitUrl.builder() 30 | .scheme("http") 31 | .authority("host") 32 | .urlType(Type.FULL) 33 | .build(); 34 | assertEquals(expected, SplitUrl.split("http:host")); 35 | assertEquals(expected, SplitUrl.split("HTTP:host")); 36 | assertEquals(expected, SplitUrl.split("http:/host")); 37 | assertEquals(expected, SplitUrl.split("http://host")); 38 | assertEquals(expected, SplitUrl.split("http:\\host")); 39 | assertEquals(expected, SplitUrl.split("http:\\\\host")); 40 | assertEquals(expected, SplitUrl.split("http://\\host")); 41 | } 42 | 43 | @Test public void protocolRelative() { 44 | SplitUrl expected = SplitUrl.builder() 45 | .authority("host") 46 | .urlType(Type.PROTOCOL_RELATIVE) 47 | .build(); 48 | assertEquals(expected, SplitUrl.split("//host")); 49 | assertEquals(expected, SplitUrl.split("///host")); 50 | assertEquals(expected, SplitUrl.split("\\\\host")); 51 | } 52 | 53 | @Test public void invalidOrMissingScheme() { 54 | assertEquals(onlyPath(":host"), SplitUrl.split(":host")); 55 | assertEquals(onlyPath("bad@scheme://host"), SplitUrl.split("bad@scheme://host")); 56 | assertEquals(onlyPath("://host"), SplitUrl.split("://host")); 57 | } 58 | 59 | @Test public void convertSchemeToLowerCase() { 60 | SplitUrl expected = SplitUrl.builder() 61 | .scheme("http") 62 | .authority("host") 63 | .urlType(Type.FULL) 64 | .build(); 65 | assertEquals(expected, SplitUrl.split("http://host")); 66 | assertEquals(expected, SplitUrl.split("HttP://host")); 67 | assertEquals(expected, SplitUrl.split("HTTP://host")); 68 | } 69 | 70 | @Test public void httpRequestLine() { 71 | assertEquals(SplitUrl.builder() 72 | .path("/pages/index.html") 73 | .urlType(Type.PATH_ABSOLUTE) 74 | .build(), SplitUrl.split("/pages/index.html")); 75 | 76 | assertEquals(SplitUrl.builder() 77 | .path("/pages/index.html") 78 | .query("user=dan") 79 | .urlType(Type.PATH_ABSOLUTE) 80 | .build(), SplitUrl.split("/pages/index.html?user=dan")); 81 | } 82 | 83 | @Test public void authorityWithUserInfo() { 84 | assertEquals(SplitUrl.builder() 85 | .scheme("http") 86 | .authority("user:password@domain.com:90") 87 | .path("/path") 88 | .urlType(Type.FULL) 89 | .build(), SplitUrl.split("http://user:password@domain.com:90/path")); 90 | } 91 | 92 | @Test public void wrongDirectionSlashes() { 93 | assertEquals(SplitUrl.builder() 94 | .scheme("http") 95 | .authority("host") 96 | .path("\\path") 97 | .urlType(Type.FULL) 98 | .build(), SplitUrl.split("http:\\\\host\\path")); 99 | } 100 | 101 | @Test public void unicodeInUrl() { 102 | assertEquals(SplitUrl.builder() 103 | .scheme("http") 104 | .authority("猫.cn") 105 | .path("/餐饮") 106 | .query("q=美味的食物") 107 | .fragment("猫") 108 | .urlType(Type.FULL) 109 | .build(), SplitUrl.split("http://猫.cn/餐饮?q=美味的食物#猫")); 110 | } 111 | 112 | @Test public void onlyScheme() { 113 | try { 114 | SplitUrl.split("http:"); 115 | } catch (IllegalArgumentException expected) { 116 | assertThat(expected.getMessage(), containsString("missing host")); 117 | } 118 | } 119 | 120 | @Test public void onlyPath() { 121 | assertEquals(onlyPath(""), SplitUrl.split("")); 122 | assertEquals(onlyPath("/"), SplitUrl.split("/")); 123 | assertEquals(onlyPath("/"), SplitUrl.split("/")); 124 | assertEquals(onlyPath("a"), SplitUrl.split("a")); 125 | assertEquals(onlyPath("/a"), SplitUrl.split("/a")); 126 | assertEquals(onlyPath("/a/"), SplitUrl.split("/a/")); 127 | } 128 | 129 | @Test public void onlyQuery() { 130 | assertEquals(onlyQuery(""), SplitUrl.split("?")); 131 | assertEquals(onlyQuery("?"), SplitUrl.split("??")); 132 | assertEquals(onlyQuery("/path"), SplitUrl.split("?/path")); 133 | assertEquals(onlyQuery("http://url"), SplitUrl.split("?http://url")); 134 | } 135 | 136 | @Test public void onlyFragment() { 137 | assertEquals(onlyFragment(""), SplitUrl.split("#")); 138 | assertEquals(onlyFragment("/path"), SplitUrl.split("#/path")); 139 | assertEquals(onlyFragment("?query"), SplitUrl.split("#?query")); 140 | } 141 | 142 | private SplitUrl onlyFragment(String fragment) { 143 | return SplitUrl.builder() 144 | .urlType(Type.FRAGMENT) 145 | .fragment(fragment) 146 | .build(); 147 | } 148 | 149 | private SplitUrl onlyQuery(String query) { 150 | return SplitUrl.builder() 151 | .urlType(Type.PATH_RELATIVE) 152 | .query(query) 153 | .build(); 154 | } 155 | 156 | private SplitUrl onlyPath(String path) { 157 | boolean absolute = path.startsWith("/") || path.startsWith("\\"); 158 | return SplitUrl.builder() 159 | .urlType(absolute ? Type.PATH_ABSOLUTE : Type.PATH_RELATIVE) 160 | .path(path) 161 | .build(); 162 | } 163 | } -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/internal/PathsTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertFalse; 5 | import static org.junit.Assert.assertNotEquals; 6 | import static org.junit.Assert.assertTrue; 7 | 8 | import java.util.Arrays; 9 | import org.junit.Test; 10 | import org.urllib.Path; 11 | 12 | public class PathsTest { 13 | 14 | @Test public void resolve() { 15 | assertEquals(Paths.parse("/home/file.pdf"), 16 | Paths.parse("/home/dir").resolve("file.pdf")); 17 | 18 | assertEquals(Paths.parse("/home/dir/file.pdf"), 19 | Paths.parse("/home/dir/").resolve("file.pdf")); 20 | } 21 | 22 | @Test public void resolve_rfc3986() { 23 | Path base = Paths.parse("/b/c/d;p"); 24 | assertEquals(Paths.parse("/b/c/g"), base.resolve("g")); 25 | assertEquals(Paths.parse("/b/c/g"), base.resolve("./g")); 26 | assertEquals(Paths.parse("/b/c/g/"), base.resolve("g/")); 27 | assertEquals(Paths.parse("/g"), base.resolve("/g")); 28 | assertEquals(Paths.parse("/b/c/d;p"), base.resolve("")); 29 | assertEquals(Paths.parse("/b/c/"), base.resolve(".")); 30 | assertEquals(Paths.parse("/b/c/"), base.resolve("./")); 31 | assertEquals(Paths.parse("/b/"), base.resolve("..")); 32 | assertEquals(Paths.parse("/b/"), base.resolve("../")); 33 | assertEquals(Paths.parse("/b/g"), base.resolve("../g")); 34 | assertEquals(Paths.parse("/"), base.resolve("../..")); 35 | assertEquals(Paths.parse("/"), base.resolve("../../")); 36 | assertEquals(Paths.parse("/"), base.resolve("../../..")); 37 | assertEquals(Paths.parse("/g"), base.resolve("../../../g")); 38 | assertEquals(Paths.parse("/g"), base.resolve("/./g")); 39 | assertEquals(Paths.parse("/g"), base.resolve("/../g")); 40 | assertEquals(Paths.parse("/b/c/g."), base.resolve("g.")); 41 | assertEquals(Paths.parse("/b/c/.g"), base.resolve(".g")); 42 | assertEquals(Paths.parse("/b/c/..g"), base.resolve("..g")); 43 | assertEquals(Paths.parse("/b/c/g.."), base.resolve("g..")); 44 | assertEquals(Paths.parse("/b/g"), base.resolve("./../g")); 45 | assertEquals(Paths.parse("/b/c/g/"), base.resolve("./g/.")); 46 | assertEquals(Paths.parse("/b/c/g/h"), base.resolve("g/./h")); 47 | assertEquals(Paths.parse("/b/c/h"), base.resolve("g/../h")); 48 | assertEquals(Paths.parse("/b/c/g;x=1/y"), base.resolve("g;x=1/./y")); 49 | assertEquals(Paths.parse("/b/c/y"), base.resolve("g;x=1/../y")); 50 | } 51 | 52 | @Test public void isEmpty() { 53 | assertTrue(Paths.empty().isEmpty()); 54 | assertTrue(Paths.of().isEmpty()); 55 | assertTrue(Paths.of("").isEmpty()); 56 | assertTrue(Paths.of("/").isEmpty()); 57 | assertTrue(Paths.of("", "").isEmpty()); 58 | 59 | assertEquals(Paths.empty(), Paths.of()); 60 | assertEquals(Paths.empty(), Paths.of("")); 61 | assertEquals(Paths.empty(), Paths.of("/")); 62 | assertEquals(Paths.empty(), Paths.of("", "")); 63 | } 64 | 65 | @Test public void pathIsAlwaysAbsolute() { 66 | Path expected = Paths.of("/a"); 67 | assertEquals(expected, Paths.of("a")); 68 | assertEquals(expected, Paths.of("./a")); 69 | assertEquals(expected, Paths.of("../a")); 70 | } 71 | 72 | @Test public void emptySegmentsAreRemoved() { 73 | assertEquals(Paths.of("/a/b/c/"), Paths.of("a/", "/b", "//c//")); 74 | } 75 | 76 | @Test public void isDir() { 77 | assertTrue(Paths.of("/a", "").isDirectory()); 78 | assertTrue(Paths.of("/a/").isDirectory()); 79 | 80 | assertFalse(Paths.of("/a").isDirectory()); 81 | assertFalse(Paths.of("a").isDirectory()); 82 | } 83 | 84 | @Test public void cleanIncorrectSlashes() { 85 | assertEquals(Paths.of("/"), Paths.of("\\")); 86 | assertEquals(Paths.of("/path"), Paths.of("\\path")); 87 | assertEquals(Paths.of("/path/"), Paths.of("\\path\\")); 88 | } 89 | 90 | @Test public void removeDotSegments() { 91 | assertEquals(Arrays.asList(), Paths.of(".").segments()); 92 | assertEquals(Arrays.asList(), Paths.of("..").segments()); 93 | assertEquals(Arrays.asList(), Paths.of("/parent/..").segments()); 94 | assertEquals(Arrays.asList("parent"), Paths.of("/parent/.").segments()); 95 | assertEquals(Arrays.asList("parent"), Paths.of("/parent/%2e").segments()); 96 | assertEquals(Arrays.asList("parent"), Paths.of("/parent/%2e/").segments()); 97 | assertEquals(Arrays.asList("parent", "dir"), Paths.of("/parent/%2e/dir").segments()); 98 | assertEquals(Arrays.asList("dir"), Paths.of("/parent/%2e%2E/dir").segments()); 99 | } 100 | 101 | @Test public void segmentsIncludeFilename() { 102 | assertEquals(Arrays.asList("a"), Paths.of("/a").segments()); 103 | assertEquals(Arrays.asList("a"), Paths.of("/a/").segments()); 104 | assertEquals(Arrays.asList("a", "b"), Paths.of("/a/b").segments()); 105 | assertEquals(Arrays.asList("a", "b"), Paths.of("/a/b/").segments()); 106 | assertEquals(Arrays.asList("a", "b", "c"), Paths.of("/a/b/c").segments()); 107 | } 108 | 109 | @Test public void filenameIsNotEncoded() { 110 | assertEquals("résumé.html", Paths.of("/docs/résumé.html").filename()); 111 | } 112 | 113 | @Test public void filenameDefaultsToEmptyString() { 114 | assertEquals("", Paths.of("/lib/").filename()); 115 | } 116 | 117 | @Test public void equalsAndHashcode() { 118 | Path a1 = Paths.of("a", "b", "c/"); 119 | Path a2 = Paths.of("/a/", "/b/", "/c/"); 120 | Path a3 = Paths.of("/a", "b", "c", ""); 121 | 122 | assertEquals(a1, a2); 123 | assertEquals(a2, a3); 124 | assertEquals(a1, a3); 125 | 126 | assertEquals(a1.hashCode(), a2.hashCode()); 127 | assertEquals(a2.hashCode(), a3.hashCode()); 128 | assertEquals(a1.hashCode(), a3.hashCode()); 129 | 130 | Path b1 = Paths.of("a"); 131 | 132 | assertNotEquals(a1, b1); 133 | assertNotEquals(a2, b1); 134 | assertNotEquals(a3, b1); 135 | 136 | assertNotEquals(a1.hashCode(), b1.hashCode()); 137 | assertNotEquals(a2.hashCode(), b1.hashCode()); 138 | assertNotEquals(a3.hashCode(), b1.hashCode()); 139 | } 140 | 141 | } -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/internal/authority/Ip6Test.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal.authority; 2 | 3 | import static org.hamcrest.CoreMatchers.containsString; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertThat; 6 | import static org.junit.Assert.fail; 7 | 8 | import org.junit.Test; 9 | 10 | public class Ip6Test { 11 | 12 | @Test public void ipv6() { 13 | assertEquals("[::]", Hosts.parse("::").name()); 14 | assertEquals("[200:100::]", Hosts.parse("[0200:0100:0:0::]").name()); 15 | assertEquals("[2::]", Hosts.parse("[2::]").name()); 16 | assertEquals("[::2]", Hosts.parse("[::2]").name()); 17 | assertEquals("[1::2]", Hosts.parse("[1::2]").name()); 18 | assertEquals("[2001:db8::ff00:42:8329]", Hosts.parse("2001:db8:0:0:0:ff00:42:8329").name()); 19 | assertEquals("[1111:2222:3333:4444:5555:6666:7777:8888]", 20 | Hosts.parse("1111:2222:3333:4444:5555:6666:7777:8888").name()); 21 | assertEquals("[1:2:3:4:5:6:7:8]", Hosts.parse("1:2:3:4:5:6:7:8").name()); 22 | } 23 | 24 | @Test public void empty() { 25 | String expected = "[::]"; 26 | assertEquals(expected, Hosts.parse("::").name()); 27 | assertEquals(expected, Hosts.parse("::0").name()); 28 | assertEquals(expected, Hosts.parse("::0:0").name()); 29 | assertEquals(expected, Hosts.parse("0:0:0:0:0:0:0:0").name()); 30 | } 31 | 32 | // From OkHttp 33 | @Test public void differentFormats() { 34 | // Multiple representations of the same address; see http://tools.ietf.org/html/rfc5952. 35 | String expected = "[2001:db8::1:0:0:1]"; 36 | assertEquals(expected, Hosts.parse("[2001:db8:0:0:1:0:0:1]").name()); 37 | assertEquals(expected, Hosts.parse("[2001:0db8:0:0:1:0:0:1]").name()); 38 | assertEquals(expected, Hosts.parse("[2001:db8::1:0:0:1]").name()); 39 | assertEquals(expected, Hosts.parse("[2001:db8::0:1:0:0:1]").name()); 40 | assertEquals(expected, Hosts.parse("[2001:0db8::1:0:0:1]").name()); 41 | assertEquals(expected, Hosts.parse("[2001:db8:0:0:1::1]").name()); 42 | assertEquals(expected, Hosts.parse("[2001:db8:0000:0:1::1]").name()); 43 | assertEquals(expected, Hosts.parse("[2001:DB8:0:0:1::1]").name()); 44 | } 45 | 46 | // From OkHttp 47 | @Test public void leadingCompression() { 48 | assertEquals("[::1]", Hosts.parse("[::0001]").name()); 49 | assertEquals("[::1]", Hosts.parse("[0000::0001]").name()); 50 | assertEquals("[::1]", Hosts.parse("[0000:0000:0000:0000:0000:0000:0000:0001]").name()); 51 | assertEquals("[::1]", Hosts.parse("[0000:0000:0000:0000:0000:0000::0001]").name()); 52 | } 53 | 54 | // From OkHttp 55 | @Test public void trailingCompression() { 56 | assertEquals("[1::]", Hosts.parse("[0001:0000::]").name()); 57 | assertEquals("[1::]", Hosts.parse("[0001::0000]").name()); 58 | assertEquals("[1::]", Hosts.parse("[0001::]").name()); 59 | assertEquals("[1::]", Hosts.parse("[1::]").name()); 60 | } 61 | 62 | // From OkHttp 63 | @Test public void tooManyDigitsInGroup() { 64 | assertInvalid("[00000:0000:0000:0000:0000:0000:0000:0001]"); 65 | assertInvalid("[::00001]"); 66 | } 67 | 68 | // From OkHttp 69 | @Test public void misplacedColons() { 70 | assertInvalid("[:0000:0000:0000:0000:0000:0000:0000:0001]"); 71 | assertInvalid("[:::0000:0000:0000:0000:0000:0000:0000:0001]"); 72 | assertInvalid("[:1]"); 73 | assertInvalid("[:::1]"); 74 | assertInvalid("[0000:0000:0000:0000:0000:0000:0001:]"); 75 | assertInvalid("[0000:0000:0000:0000:0000:0000:0000:0001:]"); 76 | assertInvalid("[0000:0000:0000:0000:0000:0000:0000:0001::]"); 77 | assertInvalid("[0000:0000:0000:0000:0000:0000:0000:0001:::]"); 78 | assertInvalid("[1:]"); 79 | assertInvalid("[1:::]"); 80 | assertInvalid("[1:::1]"); 81 | assertInvalid("[0000:0000:0000:0000::0000:0000:0000:0001]"); 82 | } 83 | 84 | // From OkHttp 85 | @Test public void tooManyGroups() { 86 | assertInvalid("[0000:0000:0000:0000:0000:0000:0000:0000:0001]"); 87 | } 88 | 89 | @Test public void tooFewGroups() { 90 | assertInvalid("[36:5361]"); 91 | } 92 | 93 | // From OkHttp 94 | @Test public void tooMuchCompression() { 95 | assertInvalid("[0000::0000:0000:0000:0000::0001]"); 96 | assertInvalid("[::0000:0000:0000:0000::0001]"); 97 | } 98 | 99 | // From OkHttp 100 | @Test public void canonicalForm() { 101 | assertEquals("[abcd:ef01:2345:6789:abcd:ef01:2345:6789]", 102 | Hosts.parse("[abcd:ef01:2345:6789:abcd:ef01:2345:6789]").name()); 103 | assertEquals("[a::b:0:0:0]", Hosts.parse("[a:0:0:0:b:0:0:0]").name()); 104 | assertEquals("[a:b:0:0:c::]", Hosts.parse("[a:b:0:0:c:0:0:0]").name()); 105 | assertEquals("[a:b::c:0:0]", Hosts.parse("[a:b:0:0:0:c:0:0]").name()); 106 | assertEquals("[a::b:0:0:0]", Hosts.parse("[a:0:0:0:b:0:0:0]").name()); 107 | assertEquals("[::a:b:0:0:0]", Hosts.parse("[0:0:0:a:b:0:0:0]").name()); 108 | assertEquals("[::a:0:0:0:b]", Hosts.parse("[0:0:0:a:0:0:0:b]").name()); 109 | assertEquals("[0:a:b:c:d:e:f:1]", Hosts.parse("[0:a:b:c:d:e:f:1]").name()); 110 | assertEquals("[a:b:c:d:e:f:1:0]", Hosts.parse("[a:b:c:d:e:f:1:0]").name()); 111 | assertEquals("[ff01::101]", Hosts.parse("[FF01:0:0:0:0:0:0:101]").name()); 112 | assertEquals("[2001:db8::1]", Hosts.parse("[2001:db8::1]").name()); 113 | assertEquals("[2001:db8::2:1]", Hosts.parse("[2001:db8:0:0:0:0:2:1]").name()); 114 | assertEquals("[2001:db8:0:1:1:1:1:1]", Hosts.parse("[2001:db8:0:1:1:1:1:1]").name()); 115 | assertEquals("[2001:db8::1:0:0:1]", Hosts.parse("[2001:db8:0:0:1:0:0:1]").name()); 116 | assertEquals("[2001:0:0:1::1]", Hosts.parse("[2001:0:0:1:0:0:0:1]").name()); 117 | assertEquals("[1::]", Hosts.parse("[1:0:0:0:0:0:0:0]").name()); 118 | assertEquals("[::1]", Hosts.parse("[0:0:0:0:0:0:0:1]").name()); 119 | assertEquals("[::]", Hosts.parse("[0:0:0:0:0:0:0:0]").name()); 120 | } 121 | 122 | private void assertInvalid(String host) { 123 | try { 124 | Hosts.parse(host); 125 | fail("Expected IllegalArgumentException for: " + host); 126 | } catch (IllegalArgumentException expected) { 127 | assertThat(expected.getMessage(), containsString("Invalid hostname")); 128 | } 129 | } 130 | } -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/internal/PercentEncoder.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | public abstract class PercentEncoder { 4 | 5 | private static final CodepointMatcher safePath = 6 | CodepointMatcher.anyOf(EncodeRules.PATH); 7 | 8 | private static final CodepointMatcher reEncodePath = 9 | CodepointMatcher.anyOf(EncodeRules.PATH + "\\/;"); 10 | 11 | private static final CodepointMatcher reEncodeQuery = 12 | CodepointMatcher.anyOf(EncodeRules.QUERY + "=?&+;"); 13 | 14 | private static final CodepointMatcher safeQuery = 15 | CodepointMatcher.anyOf(EncodeRules.QUERY); 16 | 17 | private static final CodepointMatcher safeFragment = 18 | CodepointMatcher.anyOf(EncodeRules.FRAGMENT); 19 | 20 | public static String encodePathSegment(String segment) { 21 | return PercentEncoder.encode(segment, safePath, false, false); 22 | } 23 | 24 | public static String encodeQueryComponent(String component) { 25 | return PercentEncoder.encode(component, safeQuery, true, false); 26 | } 27 | 28 | public static String encodeQueryComponentNoPlusForSpace(String component) { 29 | return PercentEncoder.encode(component, safeQuery, false, false); 30 | } 31 | 32 | public static String encodeFragment(String fragment) { 33 | return PercentEncoder.encode(fragment, safeFragment, false, false); 34 | } 35 | 36 | public static String reEncodePath(String path) { 37 | return PercentEncoder.encode(path, reEncodePath, false, true); 38 | } 39 | 40 | public static String reEncodeQuery(String query) { 41 | return PercentEncoder.encode(query, reEncodeQuery, false, true); 42 | } 43 | 44 | public static String reEncodeFragment(String fragment) { 45 | return PercentEncoder.encode(fragment, safeFragment, false, true); 46 | } 47 | 48 | private static final byte[] UPPER_HEX_DIGITS = 49 | {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 50 | 51 | private static String encode(String src, CodepointMatcher safe, boolean spaceToPlus, 52 | boolean reEncode) { 53 | if (allSafe(src, safe)) { 54 | return src; 55 | } 56 | int p = 0; 57 | int[] codepoints = Strings.codePoints(src); 58 | int[] dest = new int[maxEncodedSize(codepoints, safe, reEncode)]; 59 | for (int i = 0, len = codepoints.length; i < len; i++) { 60 | int codepoint = codepoints[i]; 61 | if (reEncode && codepoint == '%') { 62 | if (i < len - 2 && Hex.isHex(codepoints[i + 1]) && Hex.isHex(codepoints[i + 2])) { 63 | dest[p++] = '%'; 64 | } else { 65 | dest[p++] = '%'; 66 | dest[p++] = '2'; 67 | dest[p++] = '5'; 68 | } 69 | } else if (spaceToPlus && codepoint == ' ') { 70 | dest[p++] = '+'; 71 | } else if (!safe.matches(codepoint)) { 72 | p += encodeTo(codepoint, p, dest); 73 | } else { 74 | dest[p++] = codepoint; 75 | } 76 | } 77 | return new String(dest, 0, p); 78 | } 79 | 80 | private static boolean allSafe(String src, CodepointMatcher safe) { 81 | for (int i = 0; i < src.length(); i++) { 82 | if (!safe.matches(src.charAt(i))) return false; 83 | } 84 | return true; 85 | } 86 | 87 | // From Guava's PercentEscaper. 88 | private static int encodeTo(int codepoint, int p, int[] dest) { 89 | if (codepoint <= 0x7F) { 90 | // Single byte UTF-8 characters 91 | // Start with "%--" and fill in the blanks 92 | dest[p] = '%'; 93 | dest[p + 2] = UPPER_HEX_DIGITS[codepoint & 0xF]; 94 | dest[p + 1] = UPPER_HEX_DIGITS[codepoint >>> 4]; 95 | return 3; 96 | } else if (codepoint <= 0x7ff) { 97 | // Two byte UTF-8 characters [cp >= 0x80 && cp <= 0x7ff] 98 | // Start with "%--%--" and fill in the blanks 99 | dest[p] = '%'; 100 | dest[p + 3] = '%'; 101 | dest[p + 5] = UPPER_HEX_DIGITS[codepoint & 0xF]; 102 | codepoint >>>= 4; 103 | dest[p + 4] = UPPER_HEX_DIGITS[0x8 | (codepoint & 0x3)]; 104 | codepoint >>>= 2; 105 | dest[p + 2] = UPPER_HEX_DIGITS[codepoint & 0xF]; 106 | codepoint >>>= 4; 107 | dest[p + 1] = UPPER_HEX_DIGITS[0xC | codepoint]; 108 | return 6; 109 | } else if (codepoint <= 0xffff) { 110 | // Three byte UTF-8 characters [cp >= 0x800 && cp <= 0xffff] 111 | // Start with "%E-%--%--" and fill in the blanks 112 | dest[p] = '%'; 113 | dest[p + 1] = 'E'; 114 | dest[p + 3] = '%'; 115 | dest[p + 6] = '%'; 116 | dest[p + 8] = UPPER_HEX_DIGITS[codepoint & 0xF]; 117 | codepoint >>>= 4; 118 | dest[p + 7] = UPPER_HEX_DIGITS[0x8 | (codepoint & 0x3)]; 119 | codepoint >>>= 2; 120 | dest[p + 5] = UPPER_HEX_DIGITS[codepoint & 0xF]; 121 | codepoint >>>= 4; 122 | dest[p + 4] = UPPER_HEX_DIGITS[0x8 | (codepoint & 0x3)]; 123 | codepoint >>>= 2; 124 | dest[p + 2] = UPPER_HEX_DIGITS[codepoint]; 125 | return 9; 126 | } else if (codepoint <= 0x10ffff) { 127 | // Four byte UTF-8 characters [cp >= 0xffff && cp <= 0x10ffff] 128 | // Start with "%F-%--%--%--" and fill in the blanks 129 | dest[p] = '%'; 130 | dest[p + 1] = 'F'; 131 | dest[p + 3] = '%'; 132 | dest[p + 6] = '%'; 133 | dest[p + 9] = '%'; 134 | dest[p + 11] = UPPER_HEX_DIGITS[codepoint & 0xF]; 135 | codepoint >>>= 4; 136 | dest[p + 10] = UPPER_HEX_DIGITS[0x8 | (codepoint & 0x3)]; 137 | codepoint >>>= 2; 138 | dest[p + 8] = UPPER_HEX_DIGITS[codepoint & 0xF]; 139 | codepoint >>>= 4; 140 | dest[p + 7] = UPPER_HEX_DIGITS[0x8 | (codepoint & 0x3)]; 141 | codepoint >>>= 2; 142 | dest[p + 5] = UPPER_HEX_DIGITS[codepoint & 0xF]; 143 | codepoint >>>= 4; 144 | dest[p + 4] = UPPER_HEX_DIGITS[0x8 | (codepoint & 0x3)]; 145 | codepoint >>>= 2; 146 | dest[p + 2] = UPPER_HEX_DIGITS[codepoint & 0x7]; 147 | return 12; 148 | } else { 149 | throw new IllegalArgumentException("Invalid unicode character value " + codepoint); 150 | } 151 | } 152 | 153 | private static int maxEncodedSize(int[] codepoints, CodepointMatcher safe, boolean reEncode) { 154 | int size = 0; 155 | for (int codepoint : codepoints) { 156 | if (safe.matches(codepoint)) { 157 | size++; 158 | } else if (codepoint <= 0x7F) { 159 | size += 3; 160 | } else if (codepoint <= 0x7ff) { 161 | size += 6; 162 | } else if (codepoint <= 0xffff) { 163 | size += 9; 164 | } else if (codepoint <= 0x10ffff) { 165 | size += 12; 166 | } else { 167 | throw new IllegalArgumentException("Invalid unicode character value " + codepoint); 168 | } 169 | } 170 | return size; 171 | } 172 | 173 | } 174 | -------------------------------------------------------------------------------- /urllib/src/main/java/org/urllib/Urls.java: -------------------------------------------------------------------------------- 1 | package org.urllib; 2 | 3 | import com.google.auto.value.AutoValue; 4 | import java.net.URI; 5 | import java.net.URISyntaxException; 6 | import java.util.Locale; 7 | import javax.annotation.Nonnegative; 8 | import javax.annotation.Nonnull; 9 | import org.urllib.internal.Paths; 10 | import org.urllib.internal.PercentDecoder; 11 | import org.urllib.internal.PercentEncoder; 12 | import org.urllib.internal.Queries; 13 | import org.urllib.internal.Scheme; 14 | import org.urllib.internal.SplitUrl; 15 | import org.urllib.internal.Strings; 16 | import org.urllib.internal.Type; 17 | import org.urllib.internal.authority.Authority; 18 | 19 | /** 20 | * This class consists of {@code static} utility methods for operating 21 | * on URL-related objects. 22 | */ 23 | public final class Urls { 24 | 25 | /** 26 | * Given a valid {@code http} or {@code https} URL, performs the minimal amount 27 | * of escaping to return a {@link java.net.URI}. 28 | * 29 | *

The scheme, host, and port are checked for correctness. 30 | * The input must be {@code http} or {@code https}, have a valid port (if present), 31 | * and the domain must be a valid ASCII DNS, IDN, IPv4, or IPv6 host name. If these conditions 32 | * are not met, then an {@link IllegalArgumentException} is thrown. The following are 33 | * examples of inputs that would cause an {@link IllegalArgumentException}. 34 | * 35 | * 36 | * 37 | * 39 | * 40 | * 41 | * 42 | * 43 | * 44 | * 45 | * 46 | * 47 | * 48 | * 49 | * 50 | * 51 | * 52 | * 53 | * 54 | * 55 | * 56 | * 57 | * 58 | * 59 | * 60 | *
Invalid input 38 | * Reason
ldap://host.comScheme must be http or https
http://host.com:-1Invalid port
http://host..comInvalid ASCII DNS name
http://256.256.256.256Invalid IPv4
http://[zz::99]Invalid IPv6
61 | * 62 | *

If the scheme, host, and port are valid, then a minimal amount of sanitizing 63 | * and escaping is performed to create a {@link URI}: 64 | * 65 | *

    66 | *
  • Leading and trailing whitespace is removed: [\t\n\f\r ]*
  • 67 | *
  • Internal linebreaks are removed: [\n\r][\t\n\f\r ]*
  • 68 | *
  • Backslashes are corrected before the authority and within the path.
  • 69 | *
  • Characters not allowed by RFC 3986 are escaped.
  • 70 | *
  • Userinfo is removed.
  • 71 | *
72 | * 73 | * The following are examples of input and output: 74 | * 75 | * 76 | * 77 | * 79 | * 80 | * 81 | * 82 | * 83 | * 84 | * 85 | * 86 | * 87 | * 88 | * 89 | * 90 | * 91 | * 92 | * 93 | * 94 | * 95 | * 96 | * 97 | * 98 | * 99 | * 100 | * 101 | * 102 | * 103 | * 104 | * 105 | * 106 | * 107 | * 108 | * 109 | * 110 | * 111 | * 112 | * 113 | * 114 | * 115 | * 116 | * 117 | * 118 | * 119 | * 120 | * 121 | * 122 | * 123 | * 124 | *
Input 78 | * Output
http://❤http://xn--qei
http:\\host\path\?q=\#\http://host/path/?q=%5C#%5C
http://test.org/res?signature=a+b=&init=a ahttp://test.org/res?signature=a+b=&init=a%20a
http://host/path;/?q=;|http://host/path;/?q=;%7C
https://en.wikipedia.org/wiki/A*https://en.wikipedia.org/wiki/A*
https://en.wikipedia.org/wiki/C++https://en.wikipedia.org/wiki/C++
https://en.wikipedia.org/wiki/❄https://en.wikipedia.org/wiki/%E2%9D%84
http://host/%2ehttp://host/%2e
http://host/%zzhttp://host/%25zz
http://FA::0:ddhttp://[fa::dd]
http://user:pass@host.com:90http://host.com:90
125 | * 126 | * @throws IllegalArgumentException if the input is not an http or https URL; the domain is 127 | * not valid ASCII DNS, IDN, IPv4, or IPv6; or the port is invalid. 128 | */ 129 | @Nonnull public static URI createURI(@Nonnull String fullUrl) { 130 | String escaped = escape(fullUrl); 131 | 132 | try { 133 | return new URI(escaped); 134 | } catch (URISyntaxException e) { 135 | throw new AssertionError(e); 136 | } 137 | } 138 | 139 | @Nonnull public static String escape(@Nonnull String url) { 140 | String trim = Strings.sanitizeWhitespace(url); 141 | SplitUrl split = SplitUrl.split(trim); 142 | 143 | if (split.urlType() != Type.FULL) { 144 | throw new IllegalArgumentException( 145 | "Not a full URL: " + url); 146 | } 147 | 148 | StringBuilder sb = new StringBuilder(); 149 | 150 | if (split.scheme() != null) { 151 | String scheme = split.scheme().toLowerCase(Locale.US); 152 | if (scheme.equals("http") || scheme.equals("https")) { 153 | sb.append(scheme).append(':'); 154 | } else { 155 | throw new IllegalArgumentException( 156 | "Only http and https schemes are supported. Input: " + url); 157 | } 158 | } 159 | 160 | if (split.authority() != null) { 161 | sb.append("//").append(Authority.split(split.authority())); 162 | } 163 | 164 | if (split.path() != null) { 165 | sb.append(PercentEncoder.reEncodePath(split.path()).replace('\\', '/')); 166 | } 167 | 168 | if (split.query() != null) { 169 | sb.append('?').append(PercentEncoder.reEncodeQuery(split.query())); 170 | } 171 | 172 | if (split.fragment() != null) { 173 | sb.append('#').append(PercentEncoder.reEncodeFragment(split.fragment())); 174 | } 175 | 176 | return sb.toString(); 177 | } 178 | 179 | public static UrlBuilder http(String host) { 180 | return new UrlBuilder(Scheme.HTTP, host); 181 | } 182 | 183 | public static UrlBuilder https(String host) { 184 | return new UrlBuilder(Scheme.HTTPS, host); 185 | } 186 | 187 | @Nonnull public static Url parse(String url) { 188 | SplitUrl split = SplitUrl.split(Strings.sanitizeWhitespace(url)); 189 | if (split.urlType() != Type.FULL) { 190 | throw new IllegalArgumentException("URL must have a scheme and host. Eg: http://host.com/"); 191 | } 192 | 193 | UrlBuilder builder = new UrlBuilder(Scheme.valueOf(split.scheme()), split.authority()); 194 | 195 | if (!Strings.isNullOrEmpty(split.path())) { 196 | builder.path(Paths.parse(split.path())); 197 | } 198 | 199 | if (!Strings.isNullOrEmpty(split.query())) { 200 | builder.query(Queries.parse(split.query())); 201 | } 202 | 203 | if (!Strings.isNullOrEmpty(split.fragment())) { 204 | builder.fragment(PercentDecoder.decodeAll(split.fragment())); 205 | } 206 | 207 | return builder.create(); 208 | } 209 | 210 | private Urls() {} 211 | 212 | @AutoValue 213 | abstract static class ImmutableUrl implements Url { 214 | 215 | @Nonnegative abstract int defaultPort(); 216 | 217 | @Nonnull @Override public Url resolve(String reference) { 218 | String sanitized = Strings.sanitizeWhitespace(reference); 219 | if (sanitized.isEmpty()) { 220 | return this; 221 | } 222 | 223 | SplitUrl split = SplitUrl.split(sanitized); 224 | 225 | if (split.urlType() == Type.FULL) { 226 | return parse(reference); 227 | } else if (split.urlType() == Type.PROTOCOL_RELATIVE) { 228 | return parse(scheme() + ':' + reference); 229 | } 230 | 231 | UrlBuilder builder = new UrlBuilder(this); 232 | 233 | if (!Strings.isNullOrEmpty(split.path())) { 234 | builder.path(path().resolve(split.path())) 235 | .query(Queries.empty()) 236 | .fragment(""); 237 | } 238 | 239 | if (!Strings.isNullOrEmpty(split.query())) { 240 | builder.query(Queries.parse(split.query())) 241 | .fragment(""); 242 | } 243 | 244 | if (!Strings.isNullOrEmpty(split.fragment())) { 245 | builder.fragment(PercentDecoder.decodeAll(split.fragment())); 246 | } 247 | 248 | return builder.create(); 249 | } 250 | 251 | @Override @Nonnull public URI uri() { 252 | try { 253 | return new URI(toString()); 254 | } catch (URISyntaxException e) { 255 | // Reaching this point would mean a bug in our url encoding. 256 | throw new AssertionError( 257 | "Please file a bug at https://github.com/EricEdens/urllib/issues"); 258 | } 259 | } 260 | 261 | @Override public String toString() { 262 | StringBuilder sb = new StringBuilder() 263 | .append(scheme()) 264 | .append("://") 265 | .append(host().name()); 266 | 267 | if (port() != defaultPort()) { 268 | sb.append(':').append(port()); 269 | } 270 | 271 | sb.append(path().encoded()); 272 | 273 | if (!query().isEmpty()) { 274 | sb.append('?').append(query().encoded()); 275 | } 276 | 277 | if (!fragment().isEmpty()) { 278 | sb.append('#').append(PercentEncoder.encodeFragment(fragment())); 279 | } 280 | 281 | return sb.toString(); 282 | } 283 | 284 | public static Url create(String scheme, Host host, int port, Path path, Query query, 285 | String fragment, int defaultPort) { 286 | return new AutoValue_Urls_ImmutableUrl( 287 | scheme, host, port, path, query, fragment, defaultPort); 288 | } 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/internal/PercentEncoderTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib.internal; 2 | 3 | import static org.hamcrest.CoreMatchers.is; 4 | import static org.hamcrest.core.AnyOf.anyOf; 5 | import static org.junit.Assert.assertEquals; 6 | import static org.junit.Assert.assertThat; 7 | 8 | import java.net.URI; 9 | import java.net.URISyntaxException; 10 | import org.junit.Test; 11 | 12 | public class PercentEncoderTest { 13 | 14 | private static final String ASCII = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008" 15 | + "\u0009\n\u000b\u000c\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" 16 | + "\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789" 17 | + ":;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f"; 18 | 19 | // Semicolons are nasty. RFC 2396 has the idea of a "path parameter" 20 | // that uses the semicolon. In queries, some servers will split 21 | // at both the ampersand *and* the semicolon. 22 | @Test public void encodeSemicolons_inPathAndQuery() { 23 | assertEquals("%3B", PercentEncoder.encodePathSegment(";")); 24 | assertEquals("%3B", PercentEncoder.encodeQueryComponent(";")); 25 | } 26 | 27 | @Test public void plusIsNotSpace_whenEncoding() { 28 | assertEquals("+", PercentEncoder.encodePathSegment("+")); 29 | assertEquals("%2B", PercentEncoder.encodeQueryComponent("+")); 30 | assertEquals("%2B", PercentEncoder.encodeQueryComponentNoPlusForSpace("+")); 31 | assertEquals("+", PercentEncoder.encodeFragment("+")); 32 | } 33 | 34 | @Test public void spaceIsPlus_onlyInQuery() { 35 | assertEquals("%20", PercentEncoder.encodePathSegment(" ")); 36 | assertEquals("+", PercentEncoder.encodeQueryComponent(" ")); 37 | assertEquals("%20", PercentEncoder.encodeQueryComponentNoPlusForSpace(" ")); 38 | assertEquals("%20", PercentEncoder.encodeFragment(" ")); 39 | } 40 | 41 | @Test public void encodedSegments_acceptedByJavaNetUri() throws URISyntaxException { 42 | String url = "http://host" 43 | + '/' + PercentEncoder.encodePathSegment(ASCII) 44 | + '?' + PercentEncoder.encodeQueryComponentNoPlusForSpace(ASCII) 45 | + '#' + PercentEncoder.encodeFragment(ASCII); 46 | URI uri = new URI(url); 47 | assertEquals('/' + ASCII, uri.getPath()); 48 | assertEquals(ASCII, uri.getQuery()); 49 | assertEquals(ASCII, uri.getFragment()); 50 | } 51 | 52 | @Test public void rfc3986_pathEncodingRequired() { 53 | assertEquals("%00", PercentEncoder.encodePathSegment("\u0000")); 54 | assertEquals("%01", PercentEncoder.encodePathSegment("\u0001")); 55 | assertEquals("%02", PercentEncoder.encodePathSegment("\u0002")); 56 | assertEquals("%03", PercentEncoder.encodePathSegment("\u0003")); 57 | assertEquals("%04", PercentEncoder.encodePathSegment("\u0004")); 58 | assertEquals("%05", PercentEncoder.encodePathSegment("\u0005")); 59 | assertEquals("%06", PercentEncoder.encodePathSegment("\u0006")); 60 | assertEquals("%07", PercentEncoder.encodePathSegment("\u0007")); 61 | assertEquals("%08", PercentEncoder.encodePathSegment("\u0008")); 62 | assertEquals("%09", PercentEncoder.encodePathSegment("\u0009")); 63 | assertEquals("%0A", PercentEncoder.encodePathSegment("\n")); 64 | assertEquals("%0B", PercentEncoder.encodePathSegment("\u000b")); 65 | assertEquals("%0C", PercentEncoder.encodePathSegment("\u000c")); 66 | assertEquals("%0D", PercentEncoder.encodePathSegment("\r")); 67 | assertEquals("%0E", PercentEncoder.encodePathSegment("\u000e")); 68 | assertEquals("%0F", PercentEncoder.encodePathSegment("\u000f")); 69 | assertEquals("%10", PercentEncoder.encodePathSegment("\u0010")); 70 | assertEquals("%11", PercentEncoder.encodePathSegment("\u0011")); 71 | assertEquals("%12", PercentEncoder.encodePathSegment("\u0012")); 72 | assertEquals("%13", PercentEncoder.encodePathSegment("\u0013")); 73 | assertEquals("%14", PercentEncoder.encodePathSegment("\u0014")); 74 | assertEquals("%15", PercentEncoder.encodePathSegment("\u0015")); 75 | assertEquals("%16", PercentEncoder.encodePathSegment("\u0016")); 76 | assertEquals("%17", PercentEncoder.encodePathSegment("\u0017")); 77 | assertEquals("%18", PercentEncoder.encodePathSegment("\u0018")); 78 | assertEquals("%19", PercentEncoder.encodePathSegment("\u0019")); 79 | assertEquals("%1A", PercentEncoder.encodePathSegment("\u001a")); 80 | assertEquals("%1B", PercentEncoder.encodePathSegment("\u001b")); 81 | assertEquals("%1C", PercentEncoder.encodePathSegment("\u001c")); 82 | assertEquals("%1D", PercentEncoder.encodePathSegment("\u001d")); 83 | assertEquals("%1E", PercentEncoder.encodePathSegment("\u001e")); 84 | assertEquals("%1F", PercentEncoder.encodePathSegment("\u001f")); 85 | assertEquals("%20", PercentEncoder.encodePathSegment("\u0020")); 86 | assertEquals("%22", PercentEncoder.encodePathSegment("\"")); 87 | assertEquals("%23", PercentEncoder.encodePathSegment("#")); 88 | assertEquals("%25", PercentEncoder.encodePathSegment("%")); 89 | assertEquals("%2F", PercentEncoder.encodePathSegment("/")); 90 | assertEquals("%3C", PercentEncoder.encodePathSegment("<")); 91 | assertEquals("%3E", PercentEncoder.encodePathSegment(">")); 92 | assertEquals("%3F", PercentEncoder.encodePathSegment("?")); 93 | assertEquals("%5B", PercentEncoder.encodePathSegment("[")); 94 | assertEquals("%5C", PercentEncoder.encodePathSegment("\\")); 95 | assertEquals("%5D", PercentEncoder.encodePathSegment("]")); 96 | assertEquals("%5E", PercentEncoder.encodePathSegment("^")); 97 | assertEquals("%60", PercentEncoder.encodePathSegment("`")); 98 | assertEquals("%7B", PercentEncoder.encodePathSegment("{")); 99 | assertEquals("%7C", PercentEncoder.encodePathSegment("|")); 100 | assertEquals("%7D", PercentEncoder.encodePathSegment("}")); 101 | assertEquals("%7F", PercentEncoder.encodePathSegment("\u007f")); 102 | } 103 | 104 | @Test public void rfc3986_pathEncodingOptional() { 105 | assertMaybeEncoded('!', PercentEncoder.encodePathSegment("!")); 106 | assertMaybeEncoded('$', PercentEncoder.encodePathSegment("$")); 107 | assertMaybeEncoded('&', PercentEncoder.encodePathSegment("&")); 108 | assertMaybeEncoded('\'', PercentEncoder.encodePathSegment("'")); 109 | assertMaybeEncoded('(', PercentEncoder.encodePathSegment("(")); 110 | assertMaybeEncoded(')', PercentEncoder.encodePathSegment(")")); 111 | assertMaybeEncoded('*', PercentEncoder.encodePathSegment("*")); 112 | assertMaybeEncoded('+', PercentEncoder.encodePathSegment("+")); 113 | assertMaybeEncoded(',', PercentEncoder.encodePathSegment(",")); 114 | assertMaybeEncoded(':', PercentEncoder.encodePathSegment(":")); 115 | assertMaybeEncoded(';', PercentEncoder.encodePathSegment(";")); 116 | assertMaybeEncoded('=', PercentEncoder.encodePathSegment("=")); 117 | assertMaybeEncoded('@', PercentEncoder.encodePathSegment("@")); 118 | } 119 | 120 | @Test public void rfc3986_pathEncodingNotRecommended() { 121 | assertEquals("-", PercentEncoder.encodePathSegment("-")); 122 | assertEquals(".", PercentEncoder.encodePathSegment(".")); 123 | assertEquals("_", PercentEncoder.encodePathSegment("_")); 124 | assertEquals("~", PercentEncoder.encodePathSegment("~")); 125 | 126 | assertEquals("0", PercentEncoder.encodePathSegment("0")); 127 | assertEquals("1", PercentEncoder.encodePathSegment("1")); 128 | assertEquals("2", PercentEncoder.encodePathSegment("2")); 129 | assertEquals("3", PercentEncoder.encodePathSegment("3")); 130 | assertEquals("4", PercentEncoder.encodePathSegment("4")); 131 | assertEquals("5", PercentEncoder.encodePathSegment("5")); 132 | assertEquals("6", PercentEncoder.encodePathSegment("6")); 133 | assertEquals("7", PercentEncoder.encodePathSegment("7")); 134 | assertEquals("8", PercentEncoder.encodePathSegment("8")); 135 | assertEquals("9", PercentEncoder.encodePathSegment("9")); 136 | assertEquals("A", PercentEncoder.encodePathSegment("A")); 137 | assertEquals("B", PercentEncoder.encodePathSegment("B")); 138 | assertEquals("C", PercentEncoder.encodePathSegment("C")); 139 | assertEquals("D", PercentEncoder.encodePathSegment("D")); 140 | assertEquals("E", PercentEncoder.encodePathSegment("E")); 141 | assertEquals("F", PercentEncoder.encodePathSegment("F")); 142 | assertEquals("G", PercentEncoder.encodePathSegment("G")); 143 | assertEquals("H", PercentEncoder.encodePathSegment("H")); 144 | assertEquals("I", PercentEncoder.encodePathSegment("I")); 145 | assertEquals("J", PercentEncoder.encodePathSegment("J")); 146 | assertEquals("K", PercentEncoder.encodePathSegment("K")); 147 | assertEquals("L", PercentEncoder.encodePathSegment("L")); 148 | assertEquals("M", PercentEncoder.encodePathSegment("M")); 149 | assertEquals("N", PercentEncoder.encodePathSegment("N")); 150 | assertEquals("O", PercentEncoder.encodePathSegment("O")); 151 | assertEquals("P", PercentEncoder.encodePathSegment("P")); 152 | assertEquals("Q", PercentEncoder.encodePathSegment("Q")); 153 | assertEquals("R", PercentEncoder.encodePathSegment("R")); 154 | assertEquals("S", PercentEncoder.encodePathSegment("S")); 155 | assertEquals("T", PercentEncoder.encodePathSegment("T")); 156 | assertEquals("U", PercentEncoder.encodePathSegment("U")); 157 | assertEquals("V", PercentEncoder.encodePathSegment("V")); 158 | assertEquals("W", PercentEncoder.encodePathSegment("W")); 159 | assertEquals("X", PercentEncoder.encodePathSegment("X")); 160 | assertEquals("Y", PercentEncoder.encodePathSegment("Y")); 161 | assertEquals("Z", PercentEncoder.encodePathSegment("Z")); 162 | assertEquals("a", PercentEncoder.encodePathSegment("a")); 163 | assertEquals("b", PercentEncoder.encodePathSegment("b")); 164 | assertEquals("c", PercentEncoder.encodePathSegment("c")); 165 | assertEquals("d", PercentEncoder.encodePathSegment("d")); 166 | assertEquals("e", PercentEncoder.encodePathSegment("e")); 167 | assertEquals("f", PercentEncoder.encodePathSegment("f")); 168 | assertEquals("g", PercentEncoder.encodePathSegment("g")); 169 | assertEquals("h", PercentEncoder.encodePathSegment("h")); 170 | assertEquals("i", PercentEncoder.encodePathSegment("i")); 171 | assertEquals("j", PercentEncoder.encodePathSegment("j")); 172 | assertEquals("k", PercentEncoder.encodePathSegment("k")); 173 | assertEquals("l", PercentEncoder.encodePathSegment("l")); 174 | assertEquals("m", PercentEncoder.encodePathSegment("m")); 175 | assertEquals("n", PercentEncoder.encodePathSegment("n")); 176 | assertEquals("o", PercentEncoder.encodePathSegment("o")); 177 | assertEquals("p", PercentEncoder.encodePathSegment("p")); 178 | assertEquals("q", PercentEncoder.encodePathSegment("q")); 179 | assertEquals("r", PercentEncoder.encodePathSegment("r")); 180 | assertEquals("s", PercentEncoder.encodePathSegment("s")); 181 | assertEquals("t", PercentEncoder.encodePathSegment("t")); 182 | assertEquals("u", PercentEncoder.encodePathSegment("u")); 183 | assertEquals("v", PercentEncoder.encodePathSegment("v")); 184 | assertEquals("w", PercentEncoder.encodePathSegment("w")); 185 | assertEquals("x", PercentEncoder.encodePathSegment("x")); 186 | assertEquals("y", PercentEncoder.encodePathSegment("y")); 187 | assertEquals("z", PercentEncoder.encodePathSegment("z")); 188 | } 189 | 190 | 191 | @Test public void rfc3986_queryEncodingRequired() { 192 | assertEquals("%00", PercentEncoder.encodeQueryComponent("\u0000")); 193 | assertEquals("%01", PercentEncoder.encodeQueryComponent("\u0001")); 194 | assertEquals("%02", PercentEncoder.encodeQueryComponent("\u0002")); 195 | assertEquals("%03", PercentEncoder.encodeQueryComponent("\u0003")); 196 | assertEquals("%04", PercentEncoder.encodeQueryComponent("\u0004")); 197 | assertEquals("%05", PercentEncoder.encodeQueryComponent("\u0005")); 198 | assertEquals("%06", PercentEncoder.encodeQueryComponent("\u0006")); 199 | assertEquals("%07", PercentEncoder.encodeQueryComponent("\u0007")); 200 | assertEquals("%08", PercentEncoder.encodeQueryComponent("\u0008")); 201 | assertEquals("%09", PercentEncoder.encodeQueryComponent("\u0009")); 202 | assertEquals("%0A", PercentEncoder.encodeQueryComponent("\n")); 203 | assertEquals("%0B", PercentEncoder.encodeQueryComponent("\u000b")); 204 | assertEquals("%0C", PercentEncoder.encodeQueryComponent("\u000c")); 205 | assertEquals("%0D", PercentEncoder.encodeQueryComponent("\r")); 206 | assertEquals("%0E", PercentEncoder.encodeQueryComponent("\u000e")); 207 | assertEquals("%0F", PercentEncoder.encodeQueryComponent("\u000f")); 208 | assertEquals("%10", PercentEncoder.encodeQueryComponent("\u0010")); 209 | assertEquals("%11", PercentEncoder.encodeQueryComponent("\u0011")); 210 | assertEquals("%12", PercentEncoder.encodeQueryComponent("\u0012")); 211 | assertEquals("%13", PercentEncoder.encodeQueryComponent("\u0013")); 212 | assertEquals("%14", PercentEncoder.encodeQueryComponent("\u0014")); 213 | assertEquals("%15", PercentEncoder.encodeQueryComponent("\u0015")); 214 | assertEquals("%16", PercentEncoder.encodeQueryComponent("\u0016")); 215 | assertEquals("%17", PercentEncoder.encodeQueryComponent("\u0017")); 216 | assertEquals("%18", PercentEncoder.encodeQueryComponent("\u0018")); 217 | assertEquals("%19", PercentEncoder.encodeQueryComponent("\u0019")); 218 | assertEquals("%1A", PercentEncoder.encodeQueryComponent("\u001a")); 219 | assertEquals("%1B", PercentEncoder.encodeQueryComponent("\u001b")); 220 | assertEquals("%1C", PercentEncoder.encodeQueryComponent("\u001c")); 221 | assertEquals("%1D", PercentEncoder.encodeQueryComponent("\u001d")); 222 | assertEquals("%1E", PercentEncoder.encodeQueryComponent("\u001e")); 223 | assertEquals("%1F", PercentEncoder.encodeQueryComponent("\u001f")); 224 | assertEquals("+", PercentEncoder.encodeQueryComponent("\u0020")); 225 | assertEquals("%22", PercentEncoder.encodeQueryComponent("\"")); 226 | assertEquals("%23", PercentEncoder.encodeQueryComponent("#")); 227 | assertEquals("%25", PercentEncoder.encodeQueryComponent("%")); 228 | assertEquals("%3C", PercentEncoder.encodeQueryComponent("<")); 229 | assertEquals("%3E", PercentEncoder.encodeQueryComponent(">")); 230 | assertEquals("%5B", PercentEncoder.encodeQueryComponent("[")); 231 | assertEquals("%5C", PercentEncoder.encodeQueryComponent("\\")); 232 | assertEquals("%5D", PercentEncoder.encodeQueryComponent("]")); 233 | assertEquals("%5E", PercentEncoder.encodeQueryComponent("^")); 234 | assertEquals("%60", PercentEncoder.encodeQueryComponent("`")); 235 | assertEquals("%7B", PercentEncoder.encodeQueryComponent("{")); 236 | assertEquals("%7C", PercentEncoder.encodeQueryComponent("|")); 237 | assertEquals("%7D", PercentEncoder.encodeQueryComponent("}")); 238 | assertEquals("%7F", PercentEncoder.encodeQueryComponent("\u007f")); 239 | } 240 | 241 | @Test public void rfc3986_queryEncodingOptional() { 242 | assertMaybeEncoded('!', PercentEncoder.encodeQueryComponent("!")); 243 | assertMaybeEncoded('$', PercentEncoder.encodeQueryComponent("$")); 244 | assertMaybeEncoded('&', PercentEncoder.encodeQueryComponent("&")); 245 | assertMaybeEncoded('\'', PercentEncoder.encodeQueryComponent("'")); 246 | assertMaybeEncoded('(', PercentEncoder.encodeQueryComponent("(")); 247 | assertMaybeEncoded(')', PercentEncoder.encodeQueryComponent(")")); 248 | assertMaybeEncoded('*', PercentEncoder.encodeQueryComponent("*")); 249 | assertMaybeEncoded('+', PercentEncoder.encodeQueryComponent("+")); 250 | assertMaybeEncoded(',', PercentEncoder.encodeQueryComponent(",")); 251 | assertMaybeEncoded('/', PercentEncoder.encodeQueryComponent("/")); 252 | assertMaybeEncoded(':', PercentEncoder.encodeQueryComponent(":")); 253 | assertMaybeEncoded(';', PercentEncoder.encodeQueryComponent(";")); 254 | assertMaybeEncoded('=', PercentEncoder.encodeQueryComponent("=")); 255 | assertMaybeEncoded('?', PercentEncoder.encodeQueryComponent("?")); 256 | assertMaybeEncoded('@', PercentEncoder.encodeQueryComponent("@")); 257 | } 258 | 259 | @Test public void rfc3986_queryEncodingNotRecommended() { 260 | assertEquals("-", PercentEncoder.encodeQueryComponent("-")); 261 | assertEquals(".", PercentEncoder.encodeQueryComponent(".")); 262 | assertEquals("_", PercentEncoder.encodeQueryComponent("_")); 263 | // This one is a quirk of form encoding where the tilde does get encoded. 264 | // Encoding an unreserved character is allowed by RFC 3986, although 265 | // not encouraged. 266 | assertMaybeEncoded('~', PercentEncoder.encodeQueryComponent("~")); 267 | 268 | assertEquals("0", PercentEncoder.encodeQueryComponent("0")); 269 | assertEquals("1", PercentEncoder.encodeQueryComponent("1")); 270 | assertEquals("2", PercentEncoder.encodeQueryComponent("2")); 271 | assertEquals("3", PercentEncoder.encodeQueryComponent("3")); 272 | assertEquals("4", PercentEncoder.encodeQueryComponent("4")); 273 | assertEquals("5", PercentEncoder.encodeQueryComponent("5")); 274 | assertEquals("6", PercentEncoder.encodeQueryComponent("6")); 275 | assertEquals("7", PercentEncoder.encodeQueryComponent("7")); 276 | assertEquals("8", PercentEncoder.encodeQueryComponent("8")); 277 | assertEquals("9", PercentEncoder.encodeQueryComponent("9")); 278 | assertEquals("A", PercentEncoder.encodeQueryComponent("A")); 279 | assertEquals("B", PercentEncoder.encodeQueryComponent("B")); 280 | assertEquals("C", PercentEncoder.encodeQueryComponent("C")); 281 | assertEquals("D", PercentEncoder.encodeQueryComponent("D")); 282 | assertEquals("E", PercentEncoder.encodeQueryComponent("E")); 283 | assertEquals("F", PercentEncoder.encodeQueryComponent("F")); 284 | assertEquals("G", PercentEncoder.encodeQueryComponent("G")); 285 | assertEquals("H", PercentEncoder.encodeQueryComponent("H")); 286 | assertEquals("I", PercentEncoder.encodeQueryComponent("I")); 287 | assertEquals("J", PercentEncoder.encodeQueryComponent("J")); 288 | assertEquals("K", PercentEncoder.encodeQueryComponent("K")); 289 | assertEquals("L", PercentEncoder.encodeQueryComponent("L")); 290 | assertEquals("M", PercentEncoder.encodeQueryComponent("M")); 291 | assertEquals("N", PercentEncoder.encodeQueryComponent("N")); 292 | assertEquals("O", PercentEncoder.encodeQueryComponent("O")); 293 | assertEquals("P", PercentEncoder.encodeQueryComponent("P")); 294 | assertEquals("Q", PercentEncoder.encodeQueryComponent("Q")); 295 | assertEquals("R", PercentEncoder.encodeQueryComponent("R")); 296 | assertEquals("S", PercentEncoder.encodeQueryComponent("S")); 297 | assertEquals("T", PercentEncoder.encodeQueryComponent("T")); 298 | assertEquals("U", PercentEncoder.encodeQueryComponent("U")); 299 | assertEquals("V", PercentEncoder.encodeQueryComponent("V")); 300 | assertEquals("W", PercentEncoder.encodeQueryComponent("W")); 301 | assertEquals("X", PercentEncoder.encodeQueryComponent("X")); 302 | assertEquals("Y", PercentEncoder.encodeQueryComponent("Y")); 303 | assertEquals("Z", PercentEncoder.encodeQueryComponent("Z")); 304 | assertEquals("a", PercentEncoder.encodeQueryComponent("a")); 305 | assertEquals("b", PercentEncoder.encodeQueryComponent("b")); 306 | assertEquals("c", PercentEncoder.encodeQueryComponent("c")); 307 | assertEquals("d", PercentEncoder.encodeQueryComponent("d")); 308 | assertEquals("e", PercentEncoder.encodeQueryComponent("e")); 309 | assertEquals("f", PercentEncoder.encodeQueryComponent("f")); 310 | assertEquals("g", PercentEncoder.encodeQueryComponent("g")); 311 | assertEquals("h", PercentEncoder.encodeQueryComponent("h")); 312 | assertEquals("i", PercentEncoder.encodeQueryComponent("i")); 313 | assertEquals("j", PercentEncoder.encodeQueryComponent("j")); 314 | assertEquals("k", PercentEncoder.encodeQueryComponent("k")); 315 | assertEquals("l", PercentEncoder.encodeQueryComponent("l")); 316 | assertEquals("m", PercentEncoder.encodeQueryComponent("m")); 317 | assertEquals("n", PercentEncoder.encodeQueryComponent("n")); 318 | assertEquals("o", PercentEncoder.encodeQueryComponent("o")); 319 | assertEquals("p", PercentEncoder.encodeQueryComponent("p")); 320 | assertEquals("q", PercentEncoder.encodeQueryComponent("q")); 321 | assertEquals("r", PercentEncoder.encodeQueryComponent("r")); 322 | assertEquals("s", PercentEncoder.encodeQueryComponent("s")); 323 | assertEquals("t", PercentEncoder.encodeQueryComponent("t")); 324 | assertEquals("u", PercentEncoder.encodeQueryComponent("u")); 325 | assertEquals("v", PercentEncoder.encodeQueryComponent("v")); 326 | assertEquals("w", PercentEncoder.encodeQueryComponent("w")); 327 | assertEquals("x", PercentEncoder.encodeQueryComponent("x")); 328 | assertEquals("y", PercentEncoder.encodeQueryComponent("y")); 329 | assertEquals("z", PercentEncoder.encodeQueryComponent("z")); 330 | } 331 | 332 | @Test public void rfc3986_fragmentEncodingRequired() { 333 | assertEquals("%00", PercentEncoder.encodeFragment("\u0000")); 334 | assertEquals("%01", PercentEncoder.encodeFragment("\u0001")); 335 | assertEquals("%02", PercentEncoder.encodeFragment("\u0002")); 336 | assertEquals("%03", PercentEncoder.encodeFragment("\u0003")); 337 | assertEquals("%04", PercentEncoder.encodeFragment("\u0004")); 338 | assertEquals("%05", PercentEncoder.encodeFragment("\u0005")); 339 | assertEquals("%06", PercentEncoder.encodeFragment("\u0006")); 340 | assertEquals("%07", PercentEncoder.encodeFragment("\u0007")); 341 | assertEquals("%08", PercentEncoder.encodeFragment("\u0008")); 342 | assertEquals("%09", PercentEncoder.encodeFragment("\u0009")); 343 | assertEquals("%0A", PercentEncoder.encodeFragment("\n")); 344 | assertEquals("%0B", PercentEncoder.encodeFragment("\u000b")); 345 | assertEquals("%0C", PercentEncoder.encodeFragment("\u000c")); 346 | assertEquals("%0D", PercentEncoder.encodeFragment("\r")); 347 | assertEquals("%0E", PercentEncoder.encodeFragment("\u000e")); 348 | assertEquals("%0F", PercentEncoder.encodeFragment("\u000f")); 349 | assertEquals("%10", PercentEncoder.encodeFragment("\u0010")); 350 | assertEquals("%11", PercentEncoder.encodeFragment("\u0011")); 351 | assertEquals("%12", PercentEncoder.encodeFragment("\u0012")); 352 | assertEquals("%13", PercentEncoder.encodeFragment("\u0013")); 353 | assertEquals("%14", PercentEncoder.encodeFragment("\u0014")); 354 | assertEquals("%15", PercentEncoder.encodeFragment("\u0015")); 355 | assertEquals("%16", PercentEncoder.encodeFragment("\u0016")); 356 | assertEquals("%17", PercentEncoder.encodeFragment("\u0017")); 357 | assertEquals("%18", PercentEncoder.encodeFragment("\u0018")); 358 | assertEquals("%19", PercentEncoder.encodeFragment("\u0019")); 359 | assertEquals("%1A", PercentEncoder.encodeFragment("\u001a")); 360 | assertEquals("%1B", PercentEncoder.encodeFragment("\u001b")); 361 | assertEquals("%1C", PercentEncoder.encodeFragment("\u001c")); 362 | assertEquals("%1D", PercentEncoder.encodeFragment("\u001d")); 363 | assertEquals("%1E", PercentEncoder.encodeFragment("\u001e")); 364 | assertEquals("%1F", PercentEncoder.encodeFragment("\u001f")); 365 | assertEquals("%20", PercentEncoder.encodeFragment("\u0020")); 366 | assertEquals("%22", PercentEncoder.encodeFragment("\"")); 367 | assertEquals("%23", PercentEncoder.encodeFragment("#")); 368 | assertEquals("%25", PercentEncoder.encodeFragment("%")); 369 | assertEquals("%3C", PercentEncoder.encodeFragment("<")); 370 | assertEquals("%3E", PercentEncoder.encodeFragment(">")); 371 | assertEquals("%5B", PercentEncoder.encodeFragment("[")); 372 | assertEquals("%5C", PercentEncoder.encodeFragment("\\")); 373 | assertEquals("%5D", PercentEncoder.encodeFragment("]")); 374 | assertEquals("%5E", PercentEncoder.encodeFragment("^")); 375 | assertEquals("%60", PercentEncoder.encodeFragment("`")); 376 | assertEquals("%7B", PercentEncoder.encodeFragment("{")); 377 | assertEquals("%7C", PercentEncoder.encodeFragment("|")); 378 | assertEquals("%7D", PercentEncoder.encodeFragment("}")); 379 | assertEquals("%7F", PercentEncoder.encodeFragment("\u007f")); 380 | } 381 | 382 | @Test public void rfc3986_fragmentEncodingOptional() { 383 | assertMaybeEncoded('!', PercentEncoder.encodeFragment("!")); 384 | assertMaybeEncoded('$', PercentEncoder.encodeFragment("$")); 385 | assertMaybeEncoded('&', PercentEncoder.encodeFragment("&")); 386 | assertMaybeEncoded('\'', PercentEncoder.encodeFragment("'")); 387 | assertMaybeEncoded('(', PercentEncoder.encodeFragment("(")); 388 | assertMaybeEncoded(')', PercentEncoder.encodeFragment(")")); 389 | assertMaybeEncoded('*', PercentEncoder.encodeFragment("*")); 390 | assertMaybeEncoded('+', PercentEncoder.encodeFragment("+")); 391 | assertMaybeEncoded(',', PercentEncoder.encodeFragment(",")); 392 | assertMaybeEncoded('/', PercentEncoder.encodeFragment("/")); 393 | assertMaybeEncoded('0', PercentEncoder.encodeFragment("0")); 394 | assertMaybeEncoded('1', PercentEncoder.encodeFragment("1")); 395 | assertMaybeEncoded('2', PercentEncoder.encodeFragment("2")); 396 | assertMaybeEncoded('3', PercentEncoder.encodeFragment("3")); 397 | assertMaybeEncoded('4', PercentEncoder.encodeFragment("4")); 398 | assertMaybeEncoded('5', PercentEncoder.encodeFragment("5")); 399 | assertMaybeEncoded('6', PercentEncoder.encodeFragment("6")); 400 | assertMaybeEncoded('7', PercentEncoder.encodeFragment("7")); 401 | assertMaybeEncoded('8', PercentEncoder.encodeFragment("8")); 402 | assertMaybeEncoded('9', PercentEncoder.encodeFragment("9")); 403 | assertMaybeEncoded(':', PercentEncoder.encodeFragment(":")); 404 | assertMaybeEncoded(';', PercentEncoder.encodeFragment(";")); 405 | assertMaybeEncoded('=', PercentEncoder.encodeFragment("=")); 406 | assertMaybeEncoded('?', PercentEncoder.encodeFragment("?")); 407 | assertMaybeEncoded('@', PercentEncoder.encodeFragment("@")); 408 | } 409 | 410 | @Test public void rfc3986_fragmentEncodingNotRecommended() { 411 | assertEquals("-", PercentEncoder.encodeFragment("-")); 412 | assertEquals(".", PercentEncoder.encodeFragment(".")); 413 | assertEquals("_", PercentEncoder.encodeFragment("_")); 414 | assertEquals("~", PercentEncoder.encodeFragment("~")); 415 | 416 | assertEquals("0", PercentEncoder.encodeFragment("0")); 417 | assertEquals("1", PercentEncoder.encodeFragment("1")); 418 | assertEquals("2", PercentEncoder.encodeFragment("2")); 419 | assertEquals("3", PercentEncoder.encodeFragment("3")); 420 | assertEquals("4", PercentEncoder.encodeFragment("4")); 421 | assertEquals("5", PercentEncoder.encodeFragment("5")); 422 | assertEquals("6", PercentEncoder.encodeFragment("6")); 423 | assertEquals("7", PercentEncoder.encodeFragment("7")); 424 | assertEquals("8", PercentEncoder.encodeFragment("8")); 425 | assertEquals("9", PercentEncoder.encodeFragment("9")); 426 | assertEquals("A", PercentEncoder.encodeFragment("A")); 427 | assertEquals("B", PercentEncoder.encodeFragment("B")); 428 | assertEquals("C", PercentEncoder.encodeFragment("C")); 429 | assertEquals("D", PercentEncoder.encodeFragment("D")); 430 | assertEquals("E", PercentEncoder.encodeFragment("E")); 431 | assertEquals("F", PercentEncoder.encodeFragment("F")); 432 | assertEquals("G", PercentEncoder.encodeFragment("G")); 433 | assertEquals("H", PercentEncoder.encodeFragment("H")); 434 | assertEquals("I", PercentEncoder.encodeFragment("I")); 435 | assertEquals("J", PercentEncoder.encodeFragment("J")); 436 | assertEquals("K", PercentEncoder.encodeFragment("K")); 437 | assertEquals("L", PercentEncoder.encodeFragment("L")); 438 | assertEquals("M", PercentEncoder.encodeFragment("M")); 439 | assertEquals("N", PercentEncoder.encodeFragment("N")); 440 | assertEquals("O", PercentEncoder.encodeFragment("O")); 441 | assertEquals("P", PercentEncoder.encodeFragment("P")); 442 | assertEquals("Q", PercentEncoder.encodeFragment("Q")); 443 | assertEquals("R", PercentEncoder.encodeFragment("R")); 444 | assertEquals("S", PercentEncoder.encodeFragment("S")); 445 | assertEquals("T", PercentEncoder.encodeFragment("T")); 446 | assertEquals("U", PercentEncoder.encodeFragment("U")); 447 | assertEquals("V", PercentEncoder.encodeFragment("V")); 448 | assertEquals("W", PercentEncoder.encodeFragment("W")); 449 | assertEquals("X", PercentEncoder.encodeFragment("X")); 450 | assertEquals("Y", PercentEncoder.encodeFragment("Y")); 451 | assertEquals("Z", PercentEncoder.encodeFragment("Z")); 452 | assertEquals("a", PercentEncoder.encodeFragment("a")); 453 | assertEquals("b", PercentEncoder.encodeFragment("b")); 454 | assertEquals("c", PercentEncoder.encodeFragment("c")); 455 | assertEquals("d", PercentEncoder.encodeFragment("d")); 456 | assertEquals("e", PercentEncoder.encodeFragment("e")); 457 | assertEquals("f", PercentEncoder.encodeFragment("f")); 458 | assertEquals("g", PercentEncoder.encodeFragment("g")); 459 | assertEquals("h", PercentEncoder.encodeFragment("h")); 460 | assertEquals("i", PercentEncoder.encodeFragment("i")); 461 | assertEquals("j", PercentEncoder.encodeFragment("j")); 462 | assertEquals("k", PercentEncoder.encodeFragment("k")); 463 | assertEquals("l", PercentEncoder.encodeFragment("l")); 464 | assertEquals("m", PercentEncoder.encodeFragment("m")); 465 | assertEquals("n", PercentEncoder.encodeFragment("n")); 466 | assertEquals("o", PercentEncoder.encodeFragment("o")); 467 | assertEquals("p", PercentEncoder.encodeFragment("p")); 468 | assertEquals("q", PercentEncoder.encodeFragment("q")); 469 | assertEquals("r", PercentEncoder.encodeFragment("r")); 470 | assertEquals("s", PercentEncoder.encodeFragment("s")); 471 | assertEquals("t", PercentEncoder.encodeFragment("t")); 472 | assertEquals("u", PercentEncoder.encodeFragment("u")); 473 | assertEquals("v", PercentEncoder.encodeFragment("v")); 474 | assertEquals("w", PercentEncoder.encodeFragment("w")); 475 | assertEquals("x", PercentEncoder.encodeFragment("x")); 476 | assertEquals("y", PercentEncoder.encodeFragment("y")); 477 | assertEquals("z", PercentEncoder.encodeFragment("z")); 478 | } 479 | 480 | private void assertMaybeEncoded(char codepoint, String encoded) { 481 | assertThat(encoded, anyOf(is("" + codepoint), is(encoded(codepoint)))); 482 | } 483 | 484 | private String encoded(char c) { 485 | return String.format("%%%02X", (int) c); 486 | } 487 | } -------------------------------------------------------------------------------- /urllib/src/test/java/org/urllib/UrlsTest.java: -------------------------------------------------------------------------------- 1 | package org.urllib; 2 | 3 | import static org.hamcrest.CoreMatchers.containsString; 4 | import static org.junit.Assert.assertEquals; 5 | import static org.junit.Assert.assertThat; 6 | import static org.junit.Assert.fail; 7 | 8 | import java.net.URI; 9 | import java.net.URISyntaxException; 10 | import java.util.Arrays; 11 | import org.junit.Test; 12 | import org.urllib.internal.Paths; 13 | import org.urllib.internal.Queries; 14 | import org.urllib.internal.Scheme; 15 | 16 | public class UrlsTest { 17 | 18 | @Test public void resolving() { 19 | Url base = Urls.parse("http://a/b/c/d;p?q"); 20 | assertEquals(Urls.parse("http://host"), base.resolve("http://host")); 21 | assertEquals(Urls.parse("http://a/path"), base.resolve("/path")); 22 | assertEquals(Urls.parse("http://a/b/c/path"), base.resolve("path")); 23 | assertEquals(Urls.parse("http://a/b/c/d;p?y"), base.resolve("?y")); 24 | assertEquals(Urls.parse("http://a/b/c/g?y"), base.resolve("g?y")); 25 | assertEquals(Urls.parse("http://a/b/c/d;p?q#s"), base.resolve("#s")); 26 | assertEquals(Urls.parse("http://a/b/c/g#s"), base.resolve("g#s")); 27 | assertEquals(Urls.parse("http://a/b/c/g?y#s"), base.resolve("g?y#s")); 28 | assertEquals(Urls.parse("http://a/b/c/;x"), base.resolve(";x")); 29 | assertEquals(Urls.parse("http://a/b/c/g;x"), base.resolve("g;x")); 30 | assertEquals(Urls.parse("http://a/b/c/g;x?y#s"), base.resolve("g;x?y#s")); 31 | assertEquals(Urls.parse("http://a/b/c/g?y/./x"), base.resolve("g?y/./x")); 32 | assertEquals(Urls.parse("http://a/b/c/g?y/../x"), base.resolve("g?y/../x")); 33 | assertEquals(Urls.parse("http://a/b/c/g#s/./x"), base.resolve("g#s/./x")); 34 | assertEquals(Urls.parse("http://a/b/c/g#s/../x"), base.resolve("g#s/../x")); 35 | } 36 | 37 | @Test public void emptyPathIsAlwaysForwardSlash() { 38 | Url expected = Urls.http("host").path("/").create(); 39 | assertEquals(expected, Urls.http("host").create()); 40 | assertEquals(expected, Urls.http("host").path("").create()); 41 | assertEquals(expected, Urls.http("host").path("\\").create()); 42 | } 43 | 44 | @Test public void scheme() { 45 | Url url = Urls.http("host").create(); 46 | assertEquals(Scheme.HTTP.name(), url.scheme()); 47 | 48 | url = Urls.https("host").create(); 49 | assertEquals(Scheme.HTTPS.name(), url.scheme()); 50 | } 51 | 52 | @Test public void host() throws Exception { 53 | Url url = Urls.http("host").create(); 54 | assertEquals("host", url.host().display()); 55 | 56 | url = Urls.http("10.10.0.1:9000").create(); 57 | assertEquals("10.10.0.1", url.host().display()); 58 | 59 | url = Urls.http("2001:0db8:0000:0000:0000:8a2e:0370:7334").create(); 60 | assertEquals("2001:db8::8a2e:370:7334", url.host().display()); 61 | 62 | url = Urls.http("[2001:db8::8a2e:370:7334]").create(); 63 | assertEquals("2001:db8::8a2e:370:7334", url.host().display()); 64 | 65 | url = Urls.http("[::A]:9000").create(); 66 | assertEquals("::a", url.host().display()); 67 | assertEquals(9000, url.port()); 68 | } 69 | 70 | @Test public void host_dontAllowInvalidIpv4() { 71 | assertInvalidHost("10.10.0", "Invalid hostname"); 72 | assertInvalidHost("1.1.1", "Invalid hostname"); 73 | assertInvalidHost("0xa.0xb.0xc.0xd", "Invalid hostname"); 74 | assertInvalidHost("3294823", "Invalid hostname"); 75 | assertInvalidHost("1.1.1.1.1", "Invalid hostname"); 76 | assertInvalidHost("-1:-1:-1:-1", "Invalid hostname"); 77 | } 78 | 79 | @Test public void host_dontAllowInvalidIpv6() { 80 | assertInvalidHost(":::", "Invalid hostname"); 81 | assertInvalidHost("1:2:3:4:5:6:7:8:9", "Invalid hostname"); 82 | assertInvalidHost("a::z:80", "Invalid hostname"); 83 | } 84 | 85 | @Test public void host_dontAllowInvalidDns() { 86 | assertInvalidHost("host .com", "Invalid hostname"); 87 | assertInvalidHost("host_name.com", "Invalid hostname"); 88 | } 89 | 90 | @Test public void host_removesUserInfo() { 91 | Url url = Urls.http("user:password@host.com").create(); 92 | assertEquals("host.com", url.host().display()); 93 | 94 | url = Urls.http("user@domain.com:password@host.com").create(); 95 | assertEquals("host.com", url.host().display()); 96 | } 97 | 98 | @Test public void host_convertCharactersToLowerCase() { 99 | assertEquals("abcd", Urls.http("ABCD").create().host().display()); 100 | assertEquals("σ", Urls.http("Σ").create().host().display()); 101 | } 102 | 103 | @Test public void host_idnEncodedAsPunycode() { 104 | Url url = Urls.http("bücher").create(); 105 | assertEquals("xn--bcher-kva", url.host().name()); 106 | assertEquals("bücher", url.host().display()); 107 | } 108 | 109 | @Test public void host_builderTakesPunycodeOrUnicode() { 110 | Url unicode = Urls.http("bücher").create(); 111 | Url punycode = Urls.http("xn--bcher-kva").create(); 112 | assertEquals(unicode, punycode); 113 | assertEquals(unicode.hashCode(), punycode.hashCode()); 114 | } 115 | 116 | @Test public void port() { 117 | Url url = Urls.http("host").create(); 118 | assertEquals(80, url.port()); 119 | 120 | url = Urls.http("host:443").create(); 121 | assertEquals(443, url.port()); 122 | 123 | url = Urls.http("host").port(8080).create(); 124 | assertEquals(8080, url.port()); 125 | 126 | url = Urls.https("host").create(); 127 | assertEquals(443, url.port()); 128 | 129 | url = Urls.https("host:80").create(); 130 | assertEquals(80, url.port()); 131 | } 132 | 133 | @Test public void fragment() { 134 | Url url = Urls.http("host") 135 | .fragment("\uD83D\uDC36") 136 | .create(); 137 | assertEquals("\uD83D\uDC36", url.fragment()); 138 | } 139 | 140 | @Test public void uriInteropAllCodepoints() { 141 | for (char point = 0; point < 0x100; point++) { 142 | String input = "" + point; 143 | Url url = Urls.http("host.com") 144 | .path("/" + input) 145 | .query(input, input) 146 | .fragment(input) 147 | .create(); 148 | 149 | assertEquals(url.toString(), url.uri().toString()); 150 | } 151 | } 152 | 153 | @Test public void uriInteropSpaceAndPlus() { 154 | Url url = Urls.parse("http://site.com/c++?q=%2B+-"); 155 | URI uri = url.uri(); 156 | assertEquals("/c++", uri.getPath()); 157 | assertEquals("q=+ -", uri.getQuery()); 158 | } 159 | 160 | @Test public void uriInteropUnicode() { 161 | Url url = Urls.parse("http://❄.com/❄?q=❄#❄"); 162 | URI uri = url.uri(); 163 | assertEquals("xn--tdi.com", uri.getHost()); 164 | assertEquals("/❄", uri.getPath()); 165 | assertEquals("q=❄", uri.getQuery()); 166 | assertEquals("❄", uri.getFragment()); 167 | } 168 | 169 | @Test public void uriInteropHash() { 170 | Url url = Urls.http("host.com") 171 | .path("/c#") 172 | .query("q", "#!") 173 | .fragment("#fragment#") 174 | .create(); 175 | URI uri = url.uri(); 176 | assertEquals("/c#", uri.getPath()); 177 | assertEquals("q=#!", uri.getQuery()); 178 | assertEquals("#fragment#", uri.getFragment()); 179 | } 180 | 181 | @Test public void uriInteropIpv6() { 182 | Url url = Urls.http("[ff::00]") 183 | .create(); 184 | URI uri = url.uri(); 185 | assertEquals("[ff::]", uri.getHost()); 186 | } 187 | 188 | @Test public void allowPortWithHost() { 189 | assertEquals(8080, Urls.http("localhost:8080").create().port()); 190 | assertEquals(80, Urls.https("localhost:80").create().port()); 191 | } 192 | 193 | @Test public void trimWhitespaceBeforeParsing() { 194 | assertEquals(Urls.http("example.com").create(), Urls.parse(" http://\nexample.\n com ")); 195 | } 196 | 197 | @Test public void parseRequiresSchemeAndHost() { 198 | String msg = "must have a scheme and host"; 199 | assertInvalidParse("host.com", msg); 200 | assertInvalidParse("//host.com", msg); 201 | assertInvalidParse("../path", msg); 202 | assertInvalidParse("path/info.pdf", msg); 203 | assertInvalidParse("#fragment", msg); 204 | } 205 | 206 | @Test public void parseSchemeMustBeHttpOrHttps() { 207 | String msg = "http or https"; 208 | assertInvalidParse("mysql://host.com", msg); 209 | assertInvalidParse("ws://host.com", msg); 210 | assertInvalidParse("jdbc://host.com", msg); 211 | assertInvalidParse("ldap://host.com", msg); 212 | } 213 | 214 | @Test public void parseSchemeIsCaseInsensitive() { 215 | assertEquals(Scheme.HTTP.name(), Urls.parse("HTTP://host.com").scheme()); 216 | assertEquals(Scheme.HTTPS.name(), Urls.parse("HtTPs://host.com").scheme()); 217 | } 218 | 219 | @Test public void parsePrefersPortFromInputString() { 220 | assertEquals(443, Urls.parse("https://host.com").port()); 221 | assertEquals(80, Urls.parse("http://host.com").port()); 222 | 223 | assertEquals(9000, Urls.parse("http://host.com:9000").port()); 224 | assertEquals(443, Urls.parse("http://host.com:443").port()); 225 | assertEquals(80, Urls.parse("https://host.com:80").port()); 226 | 227 | assertEquals(8080, Urls.parse("http://[a::443]:8080").port()); 228 | assertEquals(8080, Urls.parse("http://1.1.1.1:8080").port()); 229 | } 230 | 231 | @Test public void parseValidatesPort() { 232 | String msg = "Invalid port"; 233 | assertInvalidParse("http://host.com:-1", msg); 234 | assertInvalidParse("http://host.com:0x01", msg); 235 | assertInvalidParse("http://host.com:10000000", msg); 236 | } 237 | 238 | @Test public void parseValidatesHost() { 239 | assertInvalidParse("http:////", "missing host"); 240 | assertInvalidParse("http://?", "missing host"); 241 | assertInvalidParse("http://#", "missing host"); 242 | assertInvalidParse("http://host\u2000.com", "Invalid host"); 243 | assertInvalidParse("http://!@[", "Invalid host"); 244 | assertInvalidParse("http://[a:1:b]", "Invalid host"); 245 | assertInvalidParse("http://[1:2:3:4:5:6:7:8:9]", "Invalid host"); 246 | assertInvalidParse("http://192.168", "Invalid host"); 247 | assertInvalidParse("http://1.1.1.1.1", "Invalid host"); 248 | 249 | // Don't allow leading zeroes in ipv4 since it's unclear whether the encoding 250 | // is decimal or octal. 251 | assertInvalidParse("http://01.01.01.01", "Invalid host"); 252 | } 253 | 254 | @Test public void parseConvertsDnsHostToLowerCase() { 255 | assertEquals("host.com", Urls.parse("http://HOST.com").host().name()); 256 | assertEquals("host.com", Urls.parse("http://HoSt.COM").host().name()); 257 | } 258 | 259 | @Test public void parseCompressesIpv6() { 260 | assertEquals("[a::1]", Urls.parse("http://[a:0:0:0:0:0:0:1]").host().name()); 261 | assertEquals("[ff::]", Urls.parse("http://[FF::0:0]").host().name()); 262 | assertEquals("[::a:b:0:0:0]", Urls.parse("http://[0:0:0:a:b:00:000:0]").host().name()); 263 | } 264 | 265 | @Test public void parseAlwaysReturnsUrlWithPath() { 266 | Path expected = Paths.empty(); 267 | assertEquals(expected, Urls.parse("http://host.com").path()); 268 | assertEquals(expected, Urls.parse("http://host.com/").path()); 269 | assertEquals(expected, Urls.parse("http://host.com?query").path()); 270 | assertEquals(expected, Urls.parse("http://host.com#fragment").path()); 271 | } 272 | 273 | @Test public void parseRemovesDotSegmentsInPath() { 274 | assertEquals(Paths.of("a/b/"), Urls.parse("http://host.com/a/b/").path()); 275 | assertEquals(Paths.of("a/b/"), Urls.parse("http://host.com/a/b/.").path()); 276 | assertEquals(Paths.of("a/b/"), Urls.parse("http://host.com/a/b/c/..").path()); 277 | assertEquals(Paths.of("a/b/"), Urls.parse("http://host.com/a/b/c/../.").path()); 278 | assertEquals(Paths.of("a/"), Urls.parse("http://host.com/a/b/c/../..").path()); 279 | assertEquals(Paths.of("a/b/file.html"), Urls.parse("http://host.com/a/b/c/../file.html").path()); 280 | 281 | assertEquals(Paths.of("a/b/"), Urls.parse("http://host.com/a/b/%2e").path()); 282 | assertEquals(Paths.of("a/b/"), Urls.parse("http://host.com/a/b/c/%2E%2e").path()); 283 | } 284 | 285 | @Test public void parseRemovesEmptyQueryValues() { 286 | Query expected = Queries.of(Arrays.asList(Queries.create("k", ""))); 287 | assertEquals(expected, Urls.parse("http://host.com?k=").query()); 288 | assertEquals(expected, Urls.parse("http://host.com?k").query()); 289 | } 290 | 291 | @Test public void parseRetainsDuplicateKeysInQuery() { 292 | Query expected = Queries.of( 293 | Arrays.asList(Queries.create("k", "a"), Queries.create("k", "b"))); 294 | assertEquals(expected, Urls.parse("http://host.com?k=a&k=b").query()); 295 | } 296 | 297 | @Test public void parseRemovesPercentEncoding() { 298 | Url decoded = Urls.parse("http://host.com/docs/résumé.html?q=\uD83D\uDC3C#\uD83D\uDE03"); 299 | Url encoded = Urls.parse("http://host.com/docs/r%C3%A9sum%C3%A9.html?q=%F0%9F%90%BC#%F0%9F%98%83"); 300 | assertEquals(decoded, encoded); 301 | assertEquals(decoded, encoded); 302 | } 303 | 304 | @Test public void parseHandlesSlashesInBothDirections() { 305 | assertEquals(Urls.parse("http://host.com/a/b/"), Urls.parse("http:\\\\host.com\\a\\b\\")); 306 | } 307 | 308 | private void assertInvalidParse(String url, String msg) { 309 | try { 310 | Urls.parse(url); 311 | fail("Expected IllegalArgumentException for: " + url); 312 | } catch (IllegalArgumentException expected) { 313 | if (!expected.getMessage().contains(msg)) { 314 | throw expected; 315 | } 316 | } 317 | } 318 | 319 | private void assertInvalidHost(String host, String msg) { 320 | try { 321 | Urls.http(host); 322 | fail("Expected IllegalArgumentException for: " + host); 323 | } catch (IllegalArgumentException expected) { 324 | assertThat(expected.getMessage(), containsString(msg)); 325 | } 326 | } 327 | 328 | @Test public void minimalEscapeTrimsWhitespace() { 329 | String expected = "http://host"; 330 | assertEquals(expected, Urls.escape(" http://host ")); 331 | assertEquals(expected, Urls.escape(" http://host ")); 332 | assertEquals(expected, Urls.escape(" http://host\n")); 333 | assertEquals(expected, Urls.escape("\thttp://host\n")); 334 | assertEquals(expected, Urls.escape("\fhttp://host\n")); 335 | assertEquals(expected, Urls.escape("\fhttp://host\n\r")); 336 | } 337 | 338 | @Test public void createURI() { 339 | assertEquals("http://host/path/", Urls.createURI("http:\\\\host\\path\\").toString()); 340 | assertEquals("http://host/path/?q=%5C#%5C", Urls.createURI("http:\\\\host\\path\\?q=\\#\\").toString()); 341 | assertEquals("http://test.org/res?signature=a+b=&init=a%20a", Urls.createURI("http://test.org/res?signature=a+b=&init=a a").toString()); 342 | assertEquals("http://host/path;/?q=;%7C", Urls.createURI("http://host/path;/?q=;|").toString()); 343 | assertEquals("https://en.wikipedia.org/wiki/A*", Urls.createURI("https://en.wikipedia.org/wiki/A*").toString()); 344 | assertEquals("https://en.wikipedia.org/wiki/C++", Urls.createURI("https://en.wikipedia.org/wiki/C++").toString()); 345 | assertEquals("https://en.wikipedia.org/wiki/%E2%9D%84", Urls.createURI("https://en.wikipedia.org/wiki/❄").toString()); 346 | assertEquals("http://host/%2e", Urls.createURI("http://host/%2e").toString()); 347 | assertEquals("http://host/%25zz", Urls.createURI("http://host/%zz").toString()); 348 | assertEquals("http://[fa::dd]", Urls.createURI("http://FA::0:dd").toString()); 349 | assertEquals("http://host.com:90", Urls.createURI("http://user:pass@host.com:90").toString()); 350 | } 351 | 352 | @Test public void createURIHost() { 353 | String expected = "http://xn--qei"; 354 | String input = "http://❤"; 355 | assertEquals(expected, Urls.createURI(input).toString()); 356 | assertEquals("http://host.com:90", Urls.createURI("http://user:pass@host.com:90").toString()); 357 | assertEquals("http://host.com", Urls.createURI("http://HOST.com.").toString()); 358 | assertEquals("http://host.com", Urls.createURI("http://HOST.com:").toString()); 359 | assertEquals("http://host.com/", Urls.createURI("http://HOST.com:/").toString()); 360 | assertEquals("http://192.168.1.1", Urls.createURI("http://192.168.1.1").toString()); 361 | assertEquals("http://192.com", Urls.createURI("http://192.com").toString()); 362 | assertEquals("http://192.com", Urls.createURI("http://192%2ecom").toString()); 363 | assertEquals("http://[fa::dd]", Urls.createURI("http://FA::0:dd").toString()); 364 | } 365 | 366 | @Test public void createURIBackSlashes() { 367 | assertEquals("http://host/path/?q=%5C#%5C", 368 | Urls.createURI("http:\\\\host\\path\\?q=\\#\\").toString()); 369 | } 370 | 371 | @Test public void createURIPort() { 372 | assertEquals("http://host:80", Urls.createURI("http://host:80").toString()); 373 | } 374 | 375 | @Test public void createURIPath() { 376 | assertEquals("http://host/%2e", Urls.createURI("http://host/%2e").toString()); 377 | assertEquals("http://host/%25zz", Urls.createURI("http://host/%zz").toString()); 378 | } 379 | 380 | @Test public void minimalEscapeRemovesLineBreaks() { 381 | String expected = "http://host"; 382 | assertEquals(expected, Urls.escape("http://\nhost")); 383 | assertEquals(expected, Urls.escape("http://\n\rhost")); 384 | assertEquals(expected, Urls.escape("http://\rhost")); 385 | assertEquals(expected, Urls.escape("http://\r host")); 386 | assertEquals(expected, Urls.escape("http://\n\thost")); 387 | } 388 | 389 | @Test public void minimalEncodeToLowerCase() { 390 | assertEquals("http://host", Urls.escape("HTTP://host")); 391 | } 392 | 393 | @Test public void minimalEncodeHostname() { 394 | assertEquals("http://xn--qei", Urls.escape("http://❤")); 395 | assertEquals("http://host.com:9000", Urls.escape("http://user:password@host.com:9000")); 396 | assertEquals("http://host.com", Urls.escape("http://HOST.com.")); 397 | assertEquals("http://192.168.1.1", Urls.escape("http://192.168.1.1")); 398 | assertEquals("http://192.com", Urls.escape("http://192.com")); 399 | assertEquals("http://192.com", Urls.escape("http://192%2ecom")); 400 | assertEquals("http://[fa::dd]", Urls.escape("http://FA::0:dd")); 401 | } 402 | 403 | @Test public void minimalEncodeChecksAuthority() { 404 | try { 405 | Urls.escape("http://\\\\]/path"); 406 | fail("Expected IllegalArgumentException"); 407 | } catch (IllegalArgumentException expected) { 408 | assertThat(expected.getMessage(), containsString("Invalid hostname:")); 409 | } 410 | } 411 | 412 | @Test public void minimalEncodeFixColonSlashSlash() { 413 | String expected = "http://host"; 414 | assertEquals(expected, Urls.escape("http://////host")); 415 | assertEquals(expected, Urls.escape("http:/host")); 416 | assertEquals(expected, Urls.escape("http:\\host")); 417 | assertEquals(expected, Urls.escape("http:\\\\host")); 418 | } 419 | 420 | @Test public void minimalEscape_retainPlusInPath() { 421 | assertEquals("http://wikipedia.org/c++", Urls.escape("http://wikipedia.org/c++")); 422 | } 423 | 424 | @Test public void minimalEscape_retainPlusInQiuery() { 425 | assertEquals("http://wikipedia.org/?q=c++", Urls.escape("http://wikipedia.org/?q=c++")); 426 | } 427 | 428 | @Test public void minimalEncodePath() { 429 | verifyEscaping("/%00", "/\u0000"); 430 | verifyEscaping("/%01", "/\u0001"); 431 | verifyEscaping("/%02", "/\u0002"); 432 | verifyEscaping("/%03", "/\u0003"); 433 | verifyEscaping("/%04", "/\u0004"); 434 | verifyEscaping("/%05", "/\u0005"); 435 | verifyEscaping("/%06", "/\u0006"); 436 | verifyEscaping("/%07", "/\u0007"); 437 | verifyEscaping("/%08", "/\u0008"); 438 | verifyEscaping("/", "/\u0009"); 439 | verifyEscaping("/", "/\n"); 440 | verifyEscaping("/%0B", "/\u000b"); 441 | verifyEscaping("/", "/\u000c"); 442 | verifyEscaping("/", "/\r"); 443 | verifyEscaping("/%0E", "/\u000e"); 444 | verifyEscaping("/%0F", "/\u000f"); 445 | verifyEscaping("/%10", "/\u0010"); 446 | verifyEscaping("/%11", "/\u0011"); 447 | verifyEscaping("/%12", "/\u0012"); 448 | verifyEscaping("/%13", "/\u0013"); 449 | verifyEscaping("/%14", "/\u0014"); 450 | verifyEscaping("/%15", "/\u0015"); 451 | verifyEscaping("/%16", "/\u0016"); 452 | verifyEscaping("/%17", "/\u0017"); 453 | verifyEscaping("/%18", "/\u0018"); 454 | verifyEscaping("/%19", "/\u0019"); 455 | verifyEscaping("/%1A", "/\u001a"); 456 | verifyEscaping("/%1B", "/\u001b"); 457 | verifyEscaping("/%1C", "/\u001c"); 458 | verifyEscaping("/%1D", "/\u001d"); 459 | verifyEscaping("/%1E", "/\u001e"); 460 | verifyEscaping("/%1F", "/\u001f"); 461 | verifyEscaping("/", "/\u0020"); 462 | verifyEscaping("/!", "/!"); 463 | verifyEscaping("/%22", "/\""); 464 | verifyEscaping("/#", "/#"); 465 | verifyEscaping("/$", "/$"); 466 | verifyEscaping("/%25", "/%"); 467 | verifyEscaping("/&", "/&"); 468 | verifyEscaping("/'", "/'"); 469 | verifyEscaping("/(", "/("); 470 | verifyEscaping("/)", "/)"); 471 | verifyEscaping("/*", "/*"); 472 | verifyEscaping("/+", "/+"); 473 | verifyEscaping("/,", "/,"); 474 | verifyEscaping("/-", "/-"); 475 | verifyEscaping("/.", "/."); 476 | verifyEscaping("/", "/"); 477 | verifyEscaping("/0", "/0"); 478 | verifyEscaping("/1", "/1"); 479 | verifyEscaping("/2", "/2"); 480 | verifyEscaping("/3", "/3"); 481 | verifyEscaping("/4", "/4"); 482 | verifyEscaping("/5", "/5"); 483 | verifyEscaping("/6", "/6"); 484 | verifyEscaping("/7", "/7"); 485 | verifyEscaping("/8", "/8"); 486 | verifyEscaping("/9", "/9"); 487 | verifyEscaping("/:", "/:"); 488 | verifyEscaping("/;", "/;"); 489 | verifyEscaping("/%3C", "/<"); 490 | verifyEscaping("/=", "/="); 491 | verifyEscaping("/%3E", "/>"); 492 | verifyEscaping("/?", "/?"); 493 | verifyEscaping("/@", "/@"); 494 | verifyEscaping("/A", "/A"); 495 | verifyEscaping("/B", "/B"); 496 | verifyEscaping("/C", "/C"); 497 | verifyEscaping("/D", "/D"); 498 | verifyEscaping("/E", "/E"); 499 | verifyEscaping("/F", "/F"); 500 | verifyEscaping("/G", "/G"); 501 | verifyEscaping("/H", "/H"); 502 | verifyEscaping("/I", "/I"); 503 | verifyEscaping("/J", "/J"); 504 | verifyEscaping("/K", "/K"); 505 | verifyEscaping("/L", "/L"); 506 | verifyEscaping("/M", "/M"); 507 | verifyEscaping("/N", "/N"); 508 | verifyEscaping("/O", "/O"); 509 | verifyEscaping("/P", "/P"); 510 | verifyEscaping("/Q", "/Q"); 511 | verifyEscaping("/R", "/R"); 512 | verifyEscaping("/S", "/S"); 513 | verifyEscaping("/T", "/T"); 514 | verifyEscaping("/U", "/U"); 515 | verifyEscaping("/V", "/V"); 516 | verifyEscaping("/W", "/W"); 517 | verifyEscaping("/X", "/X"); 518 | verifyEscaping("/Y", "/Y"); 519 | verifyEscaping("/Z", "/Z"); 520 | verifyEscaping("/%5B", "/["); 521 | verifyEscaping("/", "\\"); 522 | verifyEscaping("/%5D", "/]"); 523 | verifyEscaping("/%5E", "/^"); 524 | verifyEscaping("/_", "/_"); 525 | verifyEscaping("/%60", "/`"); 526 | verifyEscaping("/a", "/a"); 527 | verifyEscaping("/b", "/b"); 528 | verifyEscaping("/c", "/c"); 529 | verifyEscaping("/d", "/d"); 530 | verifyEscaping("/e", "/e"); 531 | verifyEscaping("/f", "/f"); 532 | verifyEscaping("/g", "/g"); 533 | verifyEscaping("/h", "/h"); 534 | verifyEscaping("/i", "/i"); 535 | verifyEscaping("/j", "/j"); 536 | verifyEscaping("/k", "/k"); 537 | verifyEscaping("/l", "/l"); 538 | verifyEscaping("/m", "/m"); 539 | verifyEscaping("/n", "/n"); 540 | verifyEscaping("/o", "/o"); 541 | verifyEscaping("/p", "/p"); 542 | verifyEscaping("/q", "/q"); 543 | verifyEscaping("/r", "/r"); 544 | verifyEscaping("/s", "/s"); 545 | verifyEscaping("/t", "/t"); 546 | verifyEscaping("/u", "/u"); 547 | verifyEscaping("/v", "/v"); 548 | verifyEscaping("/w", "/w"); 549 | verifyEscaping("/x", "/x"); 550 | verifyEscaping("/y", "/y"); 551 | verifyEscaping("/z", "/z"); 552 | verifyEscaping("/%7B", "/{"); 553 | verifyEscaping("/%7C", "/|"); 554 | verifyEscaping("/%7D", "/}"); 555 | verifyEscaping("/~", "/~"); 556 | verifyEscaping("/%7F", "/\u007f"); 557 | verifyEscaping("/%C2%80", "/\u0080"); 558 | } 559 | 560 | @Test public void minimalEscapeQuery() { 561 | verifyEscaping("?%00=%00&%00=%00", "?\u0000=\u0000&\u0000=\u0000"); 562 | verifyEscaping("?%01=%01&%01=%01", "?\u0001=\u0001&\u0001=\u0001"); 563 | verifyEscaping("?%02=%02&%02=%02", "?\u0002=\u0002&\u0002=\u0002"); 564 | verifyEscaping("?%03=%03&%03=%03", "?\u0003=\u0003&\u0003=\u0003"); 565 | verifyEscaping("?%04=%04&%04=%04", "?\u0004=\u0004&\u0004=\u0004"); 566 | verifyEscaping("?%05=%05&%05=%05", "?\u0005=\u0005&\u0005=\u0005"); 567 | verifyEscaping("?%06=%06&%06=%06", "?\u0006=\u0006&\u0006=\u0006"); 568 | verifyEscaping("?%07=%07&%07=%07", "?\u0007=\u0007&\u0007=\u0007"); 569 | verifyEscaping("?%08=%08&%08=%08", "?\u0008=\u0008&\u0008=\u0008"); 570 | verifyEscaping("?%09=%09&%09=", "?\u0009=\u0009&\u0009=\u0009"); 571 | verifyEscaping("?=&=", "?\n=\n&\n=\n"); 572 | verifyEscaping("?%0B=%0B&%0B=%0B", "?\u000b=\u000b&\u000b=\u000b"); 573 | verifyEscaping("?%0C=%0C&%0C=", "?\u000c=\u000c&\u000c=\u000c"); 574 | verifyEscaping("?=&=", "?\r=\r&\r=\r"); 575 | verifyEscaping("?%0E=%0E&%0E=%0E", "?\u000e=\u000e&\u000e=\u000e"); 576 | verifyEscaping("?%0F=%0F&%0F=%0F", "?\u000f=\u000f&\u000f=\u000f"); 577 | verifyEscaping("?%10=%10&%10=%10", "?\u0010=\u0010&\u0010=\u0010"); 578 | verifyEscaping("?%11=%11&%11=%11", "?\u0011=\u0011&\u0011=\u0011"); 579 | verifyEscaping("?%12=%12&%12=%12", "?\u0012=\u0012&\u0012=\u0012"); 580 | verifyEscaping("?%13=%13&%13=%13", "?\u0013=\u0013&\u0013=\u0013"); 581 | verifyEscaping("?%14=%14&%14=%14", "?\u0014=\u0014&\u0014=\u0014"); 582 | verifyEscaping("?%15=%15&%15=%15", "?\u0015=\u0015&\u0015=\u0015"); 583 | verifyEscaping("?%16=%16&%16=%16", "?\u0016=\u0016&\u0016=\u0016"); 584 | verifyEscaping("?%17=%17&%17=%17", "?\u0017=\u0017&\u0017=\u0017"); 585 | verifyEscaping("?%18=%18&%18=%18", "?\u0018=\u0018&\u0018=\u0018"); 586 | verifyEscaping("?%19=%19&%19=%19", "?\u0019=\u0019&\u0019=\u0019"); 587 | verifyEscaping("?%1A=%1A&%1A=%1A", "?\u001a=\u001a&\u001a=\u001a"); 588 | verifyEscaping("?%1B=%1B&%1B=%1B", "?\u001b=\u001b&\u001b=\u001b"); 589 | verifyEscaping("?%1C=%1C&%1C=%1C", "?\u001c=\u001c&\u001c=\u001c"); 590 | verifyEscaping("?%1D=%1D&%1D=%1D", "?\u001d=\u001d&\u001d=\u001d"); 591 | verifyEscaping("?%1E=%1E&%1E=%1E", "?\u001e=\u001e&\u001e=\u001e"); 592 | verifyEscaping("?%1F=%1F&%1F=%1F", "?\u001f=\u001f&\u001f=\u001f"); 593 | verifyEscaping("?%20=%20&%20=", "?\u0020=\u0020&\u0020=\u0020"); 594 | verifyEscaping("?!=!&!=!", "?!=!&!=!"); 595 | verifyEscaping("?%22=%22&%22=%22", "?\"=\"&\"=\""); 596 | verifyEscaping("?#=%23&%23=%23", "?#=#&#=#"); 597 | verifyEscaping("?$=$&$=$", "?$=$&$=$"); 598 | verifyEscaping("?%25=%25&%25=%25", "?%=%&%=%"); 599 | verifyEscaping("?&=&&&=&", "?&=&&&=&"); 600 | verifyEscaping("?'='&'='", "?'='&'='"); 601 | verifyEscaping("?(=(&(=(", "?(=(&(=("); 602 | verifyEscaping("?)=)&)=)", "?)=)&)=)"); 603 | verifyEscaping("?*=*&*=*", "?*=*&*=*"); 604 | verifyEscaping("?+=+&+=+", "?+=+&+=+"); 605 | verifyEscaping("?,=,&,=,", "?,=,&,=,"); 606 | verifyEscaping("?-=-&-=-", "?-=-&-=-"); 607 | verifyEscaping("?.=.&.=.", "?.=.&.=."); 608 | verifyEscaping("?/=/&/=/", "?/=/&/=/"); 609 | verifyEscaping("?0=0&0=0", "?0=0&0=0"); 610 | verifyEscaping("?1=1&1=1", "?1=1&1=1"); 611 | verifyEscaping("?2=2&2=2", "?2=2&2=2"); 612 | verifyEscaping("?3=3&3=3", "?3=3&3=3"); 613 | verifyEscaping("?4=4&4=4", "?4=4&4=4"); 614 | verifyEscaping("?5=5&5=5", "?5=5&5=5"); 615 | verifyEscaping("?6=6&6=6", "?6=6&6=6"); 616 | verifyEscaping("?7=7&7=7", "?7=7&7=7"); 617 | verifyEscaping("?8=8&8=8", "?8=8&8=8"); 618 | verifyEscaping("?9=9&9=9", "?9=9&9=9"); 619 | verifyEscaping("?:=:&:=:", "?:=:&:=:"); 620 | verifyEscaping("?;=;&;=;", "?;=;&;=;"); 621 | verifyEscaping("?%3C=%3C&%3C=%3C", "?<=<&<=<"); 622 | verifyEscaping("?===&===", "?===&==="); 623 | verifyEscaping("?%3E=%3E&%3E=%3E", "?>=>&>=>"); 624 | verifyEscaping("??=?&?=?", "??=?&?=?"); 625 | verifyEscaping("?@=@&@=@", "?@=@&@=@"); 626 | verifyEscaping("?A=A&A=A", "?A=A&A=A"); 627 | verifyEscaping("?B=B&B=B", "?B=B&B=B"); 628 | verifyEscaping("?C=C&C=C", "?C=C&C=C"); 629 | verifyEscaping("?D=D&D=D", "?D=D&D=D"); 630 | verifyEscaping("?E=E&E=E", "?E=E&E=E"); 631 | verifyEscaping("?F=F&F=F", "?F=F&F=F"); 632 | verifyEscaping("?G=G&G=G", "?G=G&G=G"); 633 | verifyEscaping("?H=H&H=H", "?H=H&H=H"); 634 | verifyEscaping("?I=I&I=I", "?I=I&I=I"); 635 | verifyEscaping("?J=J&J=J", "?J=J&J=J"); 636 | verifyEscaping("?K=K&K=K", "?K=K&K=K"); 637 | verifyEscaping("?L=L&L=L", "?L=L&L=L"); 638 | verifyEscaping("?M=M&M=M", "?M=M&M=M"); 639 | verifyEscaping("?N=N&N=N", "?N=N&N=N"); 640 | verifyEscaping("?O=O&O=O", "?O=O&O=O"); 641 | verifyEscaping("?P=P&P=P", "?P=P&P=P"); 642 | verifyEscaping("?Q=Q&Q=Q", "?Q=Q&Q=Q"); 643 | verifyEscaping("?R=R&R=R", "?R=R&R=R"); 644 | verifyEscaping("?S=S&S=S", "?S=S&S=S"); 645 | verifyEscaping("?T=T&T=T", "?T=T&T=T"); 646 | verifyEscaping("?U=U&U=U", "?U=U&U=U"); 647 | verifyEscaping("?V=V&V=V", "?V=V&V=V"); 648 | verifyEscaping("?W=W&W=W", "?W=W&W=W"); 649 | verifyEscaping("?X=X&X=X", "?X=X&X=X"); 650 | verifyEscaping("?Y=Y&Y=Y", "?Y=Y&Y=Y"); 651 | verifyEscaping("?Z=Z&Z=Z", "?Z=Z&Z=Z"); 652 | verifyEscaping("?%5B=%5B&%5B=%5B", "?[=[&[=["); 653 | verifyEscaping("?%5C=%5C&%5C=%5C", "?\\=\\&\\=\\"); 654 | verifyEscaping("?%5D=%5D&%5D=%5D", "?]=]&]=]"); 655 | verifyEscaping("?%5E=%5E&%5E=%5E", "?^=^&^=^"); 656 | verifyEscaping("?_=_&_=_", "?_=_&_=_"); 657 | verifyEscaping("?%60=%60&%60=%60", "?`=`&`=`"); 658 | verifyEscaping("?a=a&a=a", "?a=a&a=a"); 659 | verifyEscaping("?b=b&b=b", "?b=b&b=b"); 660 | verifyEscaping("?c=c&c=c", "?c=c&c=c"); 661 | verifyEscaping("?d=d&d=d", "?d=d&d=d"); 662 | verifyEscaping("?e=e&e=e", "?e=e&e=e"); 663 | verifyEscaping("?f=f&f=f", "?f=f&f=f"); 664 | verifyEscaping("?g=g&g=g", "?g=g&g=g"); 665 | verifyEscaping("?h=h&h=h", "?h=h&h=h"); 666 | verifyEscaping("?i=i&i=i", "?i=i&i=i"); 667 | verifyEscaping("?j=j&j=j", "?j=j&j=j"); 668 | verifyEscaping("?k=k&k=k", "?k=k&k=k"); 669 | verifyEscaping("?l=l&l=l", "?l=l&l=l"); 670 | verifyEscaping("?m=m&m=m", "?m=m&m=m"); 671 | verifyEscaping("?n=n&n=n", "?n=n&n=n"); 672 | verifyEscaping("?o=o&o=o", "?o=o&o=o"); 673 | verifyEscaping("?p=p&p=p", "?p=p&p=p"); 674 | verifyEscaping("?q=q&q=q", "?q=q&q=q"); 675 | verifyEscaping("?r=r&r=r", "?r=r&r=r"); 676 | verifyEscaping("?s=s&s=s", "?s=s&s=s"); 677 | verifyEscaping("?t=t&t=t", "?t=t&t=t"); 678 | verifyEscaping("?u=u&u=u", "?u=u&u=u"); 679 | verifyEscaping("?v=v&v=v", "?v=v&v=v"); 680 | verifyEscaping("?w=w&w=w", "?w=w&w=w"); 681 | verifyEscaping("?x=x&x=x", "?x=x&x=x"); 682 | verifyEscaping("?y=y&y=y", "?y=y&y=y"); 683 | verifyEscaping("?z=z&z=z", "?z=z&z=z"); 684 | verifyEscaping("?%7B=%7B&%7B=%7B", "?{={&{={"); 685 | verifyEscaping("?%7C=%7C&%7C=%7C", "?|=|&|=|"); 686 | verifyEscaping("?%7D=%7D&%7D=%7D", "?}=}&}=}"); 687 | verifyEscaping("?~=~&~=~", "?~=~&~=~"); 688 | verifyEscaping("?%7F=%7F&%7F=%7F", "?\u007f=\u007f&\u007f=\u007f"); 689 | verifyEscaping("?%C2%80=%C2%80&%C2%80=%C2%80", "?\u0080=\u0080&\u0080=\u0080"); 690 | 691 | verifyEscaping("?%2e", "?%2e"); 692 | verifyEscaping("?%25zz", "?%zz"); 693 | verifyEscaping("?+==", "?+=="); 694 | } 695 | 696 | @Test public void minimalEscapeFragment() { 697 | verifyEscaping("#%00", "#\u0000"); 698 | verifyEscaping("#%01", "#\u0001"); 699 | verifyEscaping("#%02", "#\u0002"); 700 | verifyEscaping("#%03", "#\u0003"); 701 | verifyEscaping("#%04", "#\u0004"); 702 | verifyEscaping("#%05", "#\u0005"); 703 | verifyEscaping("#%06", "#\u0006"); 704 | verifyEscaping("#%07", "#\u0007"); 705 | verifyEscaping("#%08", "#\u0008"); 706 | verifyEscaping("#", "#\u0009"); 707 | verifyEscaping("#", "#\n"); 708 | verifyEscaping("#%0B", "#\u000b"); 709 | verifyEscaping("#", "#\u000c"); 710 | verifyEscaping("#", "#\r"); 711 | verifyEscaping("#%0E", "#\u000e"); 712 | verifyEscaping("#%0F", "#\u000f"); 713 | verifyEscaping("#%10", "#\u0010"); 714 | verifyEscaping("#%11", "#\u0011"); 715 | verifyEscaping("#%12", "#\u0012"); 716 | verifyEscaping("#%13", "#\u0013"); 717 | verifyEscaping("#%14", "#\u0014"); 718 | verifyEscaping("#%15", "#\u0015"); 719 | verifyEscaping("#%16", "#\u0016"); 720 | verifyEscaping("#%17", "#\u0017"); 721 | verifyEscaping("#%18", "#\u0018"); 722 | verifyEscaping("#%19", "#\u0019"); 723 | verifyEscaping("#%1A", "#\u001a"); 724 | verifyEscaping("#%1B", "#\u001b"); 725 | verifyEscaping("#%1C", "#\u001c"); 726 | verifyEscaping("#%1D", "#\u001d"); 727 | verifyEscaping("#%1E", "#\u001e"); 728 | verifyEscaping("#%1F", "#\u001f"); 729 | verifyEscaping("#", "#\u0020"); 730 | verifyEscaping("#!", "#!"); 731 | verifyEscaping("#%22", "#\""); 732 | verifyEscaping("#%23", "##"); 733 | verifyEscaping("#$", "#$"); 734 | verifyEscaping("#%25", "#%"); 735 | verifyEscaping("#&", "#&"); 736 | verifyEscaping("#'", "#'"); 737 | verifyEscaping("#(", "#("); 738 | verifyEscaping("#)", "#)"); 739 | verifyEscaping("#*", "#*"); 740 | verifyEscaping("#+", "#+"); 741 | verifyEscaping("#,", "#,"); 742 | verifyEscaping("#-", "#-"); 743 | verifyEscaping("#.", "#."); 744 | verifyEscaping("#/", "#/"); 745 | verifyEscaping("#0", "#0"); 746 | verifyEscaping("#1", "#1"); 747 | verifyEscaping("#2", "#2"); 748 | verifyEscaping("#3", "#3"); 749 | verifyEscaping("#4", "#4"); 750 | verifyEscaping("#5", "#5"); 751 | verifyEscaping("#6", "#6"); 752 | verifyEscaping("#7", "#7"); 753 | verifyEscaping("#8", "#8"); 754 | verifyEscaping("#9", "#9"); 755 | verifyEscaping("#:", "#:"); 756 | verifyEscaping("#;", "#;"); 757 | verifyEscaping("#%3C", "#<"); 758 | verifyEscaping("#=", "#="); 759 | verifyEscaping("#%3E", "#>"); 760 | verifyEscaping("#?", "#?"); 761 | verifyEscaping("#@", "#@"); 762 | verifyEscaping("#A", "#A"); 763 | verifyEscaping("#B", "#B"); 764 | verifyEscaping("#C", "#C"); 765 | verifyEscaping("#D", "#D"); 766 | verifyEscaping("#E", "#E"); 767 | verifyEscaping("#F", "#F"); 768 | verifyEscaping("#G", "#G"); 769 | verifyEscaping("#H", "#H"); 770 | verifyEscaping("#I", "#I"); 771 | verifyEscaping("#J", "#J"); 772 | verifyEscaping("#K", "#K"); 773 | verifyEscaping("#L", "#L"); 774 | verifyEscaping("#M", "#M"); 775 | verifyEscaping("#N", "#N"); 776 | verifyEscaping("#O", "#O"); 777 | verifyEscaping("#P", "#P"); 778 | verifyEscaping("#Q", "#Q"); 779 | verifyEscaping("#R", "#R"); 780 | verifyEscaping("#S", "#S"); 781 | verifyEscaping("#T", "#T"); 782 | verifyEscaping("#U", "#U"); 783 | verifyEscaping("#V", "#V"); 784 | verifyEscaping("#W", "#W"); 785 | verifyEscaping("#X", "#X"); 786 | verifyEscaping("#Y", "#Y"); 787 | verifyEscaping("#Z", "#Z"); 788 | verifyEscaping("#%5B", "#["); 789 | verifyEscaping("#%5C", "#\\"); 790 | verifyEscaping("#%5D", "#]"); 791 | verifyEscaping("#%5E", "#^"); 792 | verifyEscaping("#_", "#_"); 793 | verifyEscaping("#%60", "#`"); 794 | verifyEscaping("#a", "#a"); 795 | verifyEscaping("#b", "#b"); 796 | verifyEscaping("#c", "#c"); 797 | verifyEscaping("#d", "#d"); 798 | verifyEscaping("#e", "#e"); 799 | verifyEscaping("#f", "#f"); 800 | verifyEscaping("#g", "#g"); 801 | verifyEscaping("#h", "#h"); 802 | verifyEscaping("#i", "#i"); 803 | verifyEscaping("#j", "#j"); 804 | verifyEscaping("#k", "#k"); 805 | verifyEscaping("#l", "#l"); 806 | verifyEscaping("#m", "#m"); 807 | verifyEscaping("#n", "#n"); 808 | verifyEscaping("#o", "#o"); 809 | verifyEscaping("#p", "#p"); 810 | verifyEscaping("#q", "#q"); 811 | verifyEscaping("#r", "#r"); 812 | verifyEscaping("#s", "#s"); 813 | verifyEscaping("#t", "#t"); 814 | verifyEscaping("#u", "#u"); 815 | verifyEscaping("#v", "#v"); 816 | verifyEscaping("#w", "#w"); 817 | verifyEscaping("#x", "#x"); 818 | verifyEscaping("#y", "#y"); 819 | verifyEscaping("#z", "#z"); 820 | verifyEscaping("#%7B", "#{"); 821 | verifyEscaping("#%7C", "#|"); 822 | verifyEscaping("#%7D", "#}"); 823 | verifyEscaping("#~", "#~"); 824 | verifyEscaping("#%7F", "#\u007f"); 825 | verifyEscaping("#%C2%80", "#\u0080"); 826 | verifyEscaping("#%C2%81", "#\u0081"); 827 | verifyEscaping("#%C2%82", "#\u0082"); 828 | verifyEscaping("#%C2%83", "#\u0083"); 829 | verifyEscaping("#%C2%84", "#\u0084"); 830 | 831 | verifyEscaping("#%2e", "#%2e"); 832 | verifyEscaping("#%25zz", "#%zz"); 833 | } 834 | 835 | private void verifyEscaping(String expected, String input) { 836 | expected = "http://host" + expected; 837 | input = "http://host" + input; 838 | try { 839 | new URI(expected); 840 | } catch (URISyntaxException e) { 841 | throw new AssertionError(e); 842 | } 843 | assertEquals(expected, Urls.escape(input)); 844 | } 845 | } --------------------------------------------------------------------------------