105 lines
4.4 KiB
HTML
105 lines
4.4 KiB
HTML
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<script src="../../resources/js-test-pre.js"></script>
|
|
<script src="resources/utilities.js"></script>
|
|
</head>
|
|
<body>
|
|
<script>
|
|
description("Canonicalization of host names.");
|
|
|
|
cases = [
|
|
// Basic canonicalization, uppercase should be converted to lowercase.
|
|
["GoOgLe.CoM", "google.com"],
|
|
// Spaces and some other characters should be escaped.
|
|
["Goo%20 goo%7C|.com", "goo%20%20goo%7C%7C.com"],
|
|
// Exciting different types of spaces!
|
|
["GOO\u00a0\u3000goo.com", "goo%20%20goo.com"],
|
|
// Other types of space (no-break, zero-width, zero-width-no-break) are
|
|
// name-prepped away to nothing.
|
|
["GOO\u200b\u2060\ufeffgoo.com", "googoo.com"],
|
|
// Ideographic full stop (full-width period for Chinese, etc.) should be
|
|
// treated as a dot.
|
|
["www.foo\u3002" + "bar.com", "www.foo.bar.com"],
|
|
// Invalid unicode characters should fail...
|
|
// ...In wide input, ICU will barf and we'll end up with the input as
|
|
// escaped UTF-8 (the invalid character should be replaced with the
|
|
// replacement character).
|
|
["\ufdd0zyx.com", "%EF%BF%BDzyx.com"],
|
|
// ...This is the same as previous but with with escaped.
|
|
["%ef%b7%90zyx.com", "%EF%BF%BDzyx.com"],
|
|
// Test name prepping, fullwidth input should be converted to ASCII and NOT
|
|
// IDN-ized. This is "Go" in fullwidth UTF-8/UTF-16.
|
|
["\uff27\uff4f.com", "go.com"],
|
|
// Test that fullwidth escaped values are properly name-prepped,
|
|
// then converted or rejected.
|
|
// ...%41 in fullwidth = 'A' (also as escaped UTF-8 input)
|
|
["\uff05\uff14\uff11.com", "a.com"],
|
|
["%ef%bc%85%ef%bc%94%ef%bc%91.com", "a.com"],
|
|
// ...%00 in fullwidth should fail (also as escaped UTF-8 input)
|
|
["\uff05\uff10\uff10.com", "%00.com"],
|
|
["%ef%bc%85%ef%bc%90%ef%bc%90.com", "%00.com"],
|
|
// Basic IDN support, UTF-8 and UTF-16 input should be converted to IDN
|
|
["\u4f60\u597d\u4f60\u597d", "xn--6qqa088eba"],
|
|
// Mixed UTF-8 and escaped UTF-8 (narrow case) and UTF-16 and escaped
|
|
// UTF-8 (wide case). The output should be equivalent to the true wide
|
|
// character input above).
|
|
["%E4%BD%A0%E5%A5%BD\u4f60\u597d", "xn--6qqa088eba"],
|
|
// Invalid escaped characters should fail and the percents should be
|
|
// escaped.
|
|
["%zz%66%a", "%25zzf%25a"],
|
|
// If we get an invalid character that has been escaped.
|
|
["%25", "%25"],
|
|
["hello%00", "hello%00"],
|
|
// Escaped numbers should be treated like IP addresses if they are.
|
|
["%30%78%63%30%2e%30%32%35%30.01", "192.168.0.1"],
|
|
["%30%78%63%30%2e%30%32%35%30.01%2e", "192.168.0.1"],
|
|
// Invalid escaping should trigger the regular host error handling.
|
|
["%3g%78%63%30%2e%30%32%35%30%2E.01", "%253gxc0.0250..01"],
|
|
// Something that isn't exactly an IP should get treated as a host and
|
|
// spaces escaped.
|
|
["192.168.0.1 hello", "192.168.0.1%20hello"],
|
|
// Fullwidth and escaped UTF-8 fullwidth should still be treated as IP.
|
|
// These are "0Xc0.0250.01" in fullwidth.
|
|
["\uff10\uff38\uff43\uff10\uff0e\uff10\uff12\uff15\uff10\uff0e\uff10\uff11", "192.168.0.1"],
|
|
// Broken IP addresses get marked as such.
|
|
["192.168.0.257", "192.168.0.257"],
|
|
["[google.com]", "[google.com]"],
|
|
// Cyrillic letter followed buy ( should return punicode for ( escaped before punicode string was created. I.e.
|
|
// if ( is escaped after punicode is created we would get xn--%28-8tb (incorrect).
|
|
["\u0442(", "xn--%28-7ed"],
|
|
["go\\\\@ogle.com","go/@ogle.com"],
|
|
["go/@ogle.com","go/@ogle.com"],
|
|
["www.lookout.net::==80::==443::","www.lookout.net::%3D%3D80::%3D%3D443:"],
|
|
["www.lookout.net::80::443","www.lookout.net::80::443"],
|
|
// From http://eaea.sirdarckcat.net/uritest.html
|
|
["\\./","./"],
|
|
["//:@/","/"],
|
|
["\\google.com/foo","google.com/foo"],
|
|
["\\\\google.com/foo","google.com/foo"],
|
|
["//asdf@/","asdf@/"],
|
|
["//:81",":81"],
|
|
["://","://"],
|
|
["c:","c"],
|
|
["xxxx:","xxxx"],
|
|
[".:.",".:."],
|
|
["////@google.com/","google.com/"],
|
|
["@google.com","google.com"],
|
|
["quip\u2010apple.com", "xn--quipapple-y79d.com"],
|
|
["quip\u2011apple.com", "xn--quipapple-y79d.com"],
|
|
["quip\u2212apple.com", "xn--quipapple-tf4e.com"],
|
|
["iclou\uA771.com", "xn--iclou-rl3s.com"],
|
|
["g\u1ECD\u1ECDgle.com", "xn--ggle-gx5aa.com"]
|
|
];
|
|
|
|
for (var i = 0; i < cases.length; ++i) {
|
|
test_vector = cases[i][0];
|
|
expected_result = cases[i][1];
|
|
shouldBe("canonicalize('http://" + test_vector + "/')",
|
|
"'http://" + expected_result + "/'");
|
|
}
|
|
</script>
|
|
<script src="../../resources/js-test-post.js"></script>
|
|
</body>
|
|
</html>
|