302 lines
9.2 KiB
HTML
302 lines
9.2 KiB
HTML
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
|
|
|
<html>
|
|
|
|
<head>
|
|
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
|
|
<title>test of JavaScript URI encoding and decoding methods</title>
|
|
</head>
|
|
|
|
<body>
|
|
|
|
<script type="text/javascript">
|
|
|
|
if (window.testRunner) testRunner.dumpAsText();
|
|
|
|
// --------
|
|
|
|
// Helper functions.
|
|
|
|
function hexDigit(number)
|
|
{
|
|
if (number >= 10)
|
|
return String.fromCharCode(number + 55);
|
|
return number;
|
|
}
|
|
|
|
function printable(s)
|
|
{
|
|
if (s == "")
|
|
return "empty string";
|
|
var p = "";
|
|
for (var i = 0; i < s.length; i++) {
|
|
var c = s.charAt(i);
|
|
var cc = s.charCodeAt(i);
|
|
if (c == "\\") {
|
|
p += "\\\\";
|
|
} else if (c == "\"") {
|
|
p += "\\\"";
|
|
} else if (c == "\n") {
|
|
p += "\\n";
|
|
} else if (c == "\r") {
|
|
p += "\\r";
|
|
} else if (c == "\t") {
|
|
p += "\\t";
|
|
} else if (cc >= 20 && cc < 0x7F) {
|
|
p += c;
|
|
} else if (cc <= 0xFF) {
|
|
p += "\\x" + hexDigit(cc >> 4) + hexDigit(cc & 0xF);
|
|
} else if (cc <= 0xFFFF) {
|
|
p += "\\u" + hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
|
|
} else {
|
|
p += "\\U" + hexDigit((cc >> 28) & 0xF) + hexDigit((cc >> 24) & 0xF) + hexDigit((cc >> 20) & 0xF) + hexDigit((cc >> 16) & 0xF)
|
|
+ hexDigit((cc >> 12) & 0xF) + hexDigit((cc >> 8) & 0xF) + hexDigit((cc >> 4) & 0xF) + hexDigit(cc & 0xF);
|
|
}
|
|
}
|
|
return "\"" + p + "\"";
|
|
}
|
|
|
|
function encodedCharacter(c)
|
|
{
|
|
// UTF-8 is what Gecko does, but not what WinIE 6 does.
|
|
// It makes much more sense, though, to produce encodings that actually work in URLs.
|
|
// So for JavaScriptCore, we want to match Gecko on this, WinIE on most other things.
|
|
|
|
// Instead of writing a JavaScript implementation of UTF-8 escaping, just do some specific cases here.
|
|
switch (c) {
|
|
case 0x80: return "%C2%80";
|
|
case 0x7FF: return "%DF%BF";
|
|
case 0x800: return "%E0%A0%80";
|
|
case 0x2022: return "%E2%80%A2";
|
|
case 0xD7FF: return "%ED%9F%BF";
|
|
case 0xD800: return "%ED%A0%80";
|
|
case 0xE000: return "%EE%80%80";
|
|
case 0xFFFC: return "%EF%BF%BC";
|
|
case 0xFFFD: return "%EF%BF%BD";
|
|
}
|
|
|
|
if (c < 0 || c > 0x7F) {
|
|
window.alert("encodedCharacter doesn't know how to escape character code " + c);
|
|
return "?";
|
|
}
|
|
|
|
return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
|
|
}
|
|
|
|
function escapedCharacter(c)
|
|
{
|
|
// UTF-8 is what Gecko does, but we do what Win IE does instead. We use Latin1 for
|
|
// unicode characters less than or equal to 255, and %u encoding for everything else.
|
|
switch (c) {
|
|
case 0x80: return "%80";
|
|
case 0x7FF: return "%u07FF";
|
|
case 0x800: return "%u0800";
|
|
case 0x2022: return "%u2022";
|
|
case 0xD7FF: return "%uD7FF";
|
|
case 0xD800: return "%uD800";
|
|
case 0xE000: return "%uE000";
|
|
case 0xFFFC: return "%uFFFC";
|
|
case 0xFFFD: return "%uFFFD";
|
|
}
|
|
|
|
if (c < 0 || c > 0x7F) {
|
|
window.alert("escapedCharacter doesn't know how to escape character code " + c);
|
|
return "?";
|
|
}
|
|
|
|
return "%" + hexDigit(c >> 4) + hexDigit(c - (c >> 4 << 4));
|
|
}
|
|
|
|
function forEachCharacterCode(f, s)
|
|
{
|
|
for (var i = 0; i < s.length; i++) {
|
|
f(s.charCodeAt(i));
|
|
}
|
|
}
|
|
|
|
function call(functionName, parameter)
|
|
{
|
|
try {
|
|
result = eval(functionName + "(parameter)");
|
|
} catch (e) {
|
|
result = "exception";
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// --------
|
|
|
|
// Build up tables with expected results.
|
|
|
|
var expectedResult = new Object;
|
|
|
|
function addExpectedNonEscaped(f, c)
|
|
{
|
|
expectedResult[f + "(" + String.fromCharCode(c) + ")"] = String.fromCharCode(c);
|
|
}
|
|
|
|
function addNoEscape(c)
|
|
{
|
|
addExpectedNonEscaped("escape", c);
|
|
addExpectedNonEscaped("encodeURI", c);
|
|
addExpectedNonEscaped("encodeURIComponent", c);
|
|
}
|
|
|
|
function addEscapeNoEscape(c)
|
|
{
|
|
addExpectedNonEscaped("escape", c);
|
|
}
|
|
|
|
function addURIComponentNoEscape(c)
|
|
{
|
|
addExpectedNonEscaped("encodeURI", c);
|
|
addExpectedNonEscaped("encodeURIComponent", c);
|
|
}
|
|
|
|
function addURINoEscape(c)
|
|
{
|
|
addExpectedNonEscaped("encodeURI", c);
|
|
expectedResult["decodeURI(" + escapedCharacter(c) + ")"] = encodedCharacter(c);
|
|
expectedResult["decodeURI(" + escapedCharacter(c).toLowerCase() + ")"] = encodedCharacter(c).toLowerCase();
|
|
}
|
|
|
|
forEachCharacterCode(addNoEscape, "*0123456789-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_");
|
|
forEachCharacterCode(addEscapeNoEscape, "+/");
|
|
forEachCharacterCode(addURINoEscape, "@#$&+,/:;=?");
|
|
forEachCharacterCode(addURIComponentNoEscape, "!'()~");
|
|
|
|
// WinIE 6's escape function does not escape @, although Gecko's does.
|
|
expectedResult["escape(@)"] = "@";
|
|
|
|
// --------
|
|
|
|
// Run tests.
|
|
|
|
var failureCount = 0;
|
|
|
|
function test(functionName, parameter, desiredResult)
|
|
{
|
|
var alternateResult = expectedResult[functionName + "(" + parameter + ")"];
|
|
if (alternateResult)
|
|
desiredResult = alternateResult;
|
|
var result = call(functionName, parameter);
|
|
if (result != desiredResult) {
|
|
var s = "called " + functionName + " on " + printable(parameter) + " and got " + printable(result) + " instead of " + printable(desiredResult);
|
|
document.writeln("<p>" + s + "</p>");
|
|
failureCount += 1;
|
|
}
|
|
}
|
|
|
|
function testEscapeAndEncode(parameter, escapeExpected, encodeExpected)
|
|
{
|
|
test("escape", parameter, escapeExpected);
|
|
test("encodeURI", parameter, encodeExpected);
|
|
test("encodeURIComponent", parameter, encodeExpected);
|
|
}
|
|
|
|
function testUnescape(parameter, expected)
|
|
{
|
|
test("unescape", parameter, expected);
|
|
}
|
|
|
|
function testDecode(parameter, expected)
|
|
{
|
|
if (expected == "\uFFFE" || expected == "\uFFFF")
|
|
expected = "\uFFFD";
|
|
|
|
test("decodeURI", parameter, expected);
|
|
test("decodeURIComponent", parameter, expected);
|
|
}
|
|
|
|
function testUnescapeAndDecode(parameter, expectedUnescape, expectedDecode)
|
|
{
|
|
testUnescape(parameter, expectedUnescape);
|
|
testDecode(parameter, expectedDecode);
|
|
}
|
|
|
|
function testCharacter(c)
|
|
{
|
|
var s = String.fromCharCode(c);
|
|
var escaped = escapedCharacter(c);
|
|
var encoded = encodedCharacter(c);
|
|
|
|
testEscapeAndEncode(s, escaped, encoded);
|
|
testUnescape(escaped, s);
|
|
testUnescape(escaped.toLowerCase(), s);
|
|
testDecode(encoded, s);
|
|
testDecode(encoded.toLowerCase(), s);
|
|
}
|
|
|
|
for (var c = 0; c <= 128; c++) {
|
|
testCharacter(c);
|
|
}
|
|
testCharacter(0x7FF);
|
|
testCharacter(0x800);
|
|
testCharacter(0x2022);
|
|
testCharacter(0xD7FF);
|
|
testCharacter(0xE000);
|
|
testCharacter(0xFFFC);
|
|
testCharacter(0xFFFD);
|
|
|
|
// These tests are currently turned off because it's not yet entirely clear what correct behavior
|
|
// is for these cases. Gecko seems to reject values in the surrogate range entirely, yet turns
|
|
// U+FFFE and U+FFFF into U+FFFD, even though Unicode documentation says to treat both the same.
|
|
// And all the JavaScript engines seem to use UTF-16 in a way that prevents characters greater
|
|
// than U+FFFF (outside the BMP) from working properly.
|
|
|
|
//testCharacter(0xD800);
|
|
//testCharacter(0xDBFF);
|
|
//testCharacter(0xDC00);
|
|
//testCharacter(0xDFFF);
|
|
//testCharacter(0xFFFE);
|
|
//testCharacter(0xFFFF);
|
|
//testCharacter(0x10000);
|
|
|
|
testUnescapeAndDecode("%", "%", "exception");
|
|
testUnescapeAndDecode("%0", "%0", "exception");
|
|
testUnescapeAndDecode("%a", "%a", "exception");
|
|
testUnescapeAndDecode("%u", "%u", "exception");
|
|
testUnescapeAndDecode("%xx", "%xx", "exception");
|
|
testUnescapeAndDecode("%u004", "%u004", "exception");
|
|
testUnescapeAndDecode("%u0041", "A", "exception");
|
|
testUnescapeAndDecode("%uxxxx", "%uxxxx", "exception");
|
|
|
|
testUnescapeAndDecode(String.fromCharCode(0x80), String.fromCharCode(0x80), String.fromCharCode(0x80));
|
|
testUnescapeAndDecode(String.fromCharCode(0xD800), String.fromCharCode(0xD800), String.fromCharCode(0xD800));
|
|
|
|
testUnescapeAndDecode("%C2%80", String.fromCharCode(0xC2) + String.fromCharCode(0x80), String.fromCharCode(0x80));
|
|
testUnescapeAndDecode("%C2", String.fromCharCode(0xC2), "exception");
|
|
testUnescapeAndDecode("%C2" + String.fromCharCode(0x80), String.fromCharCode(0xC2) + String.fromCharCode(0x80), "exception");
|
|
|
|
// The characters below hav to be literal because String.fromCharCode will make a single character
|
|
// and the \u syntax won't allow us to specify characters with Unicode values higher than U+FFFF.
|
|
// For most JavaScript engines, this will turn into two characters because they use UTF-16
|
|
// instead of Unicode; it's not clear to me at the moment if the standard asks for this UTF-16
|
|
// behavior, forbids it, or doesn't say either way.
|
|
testEscapeAndEncode("𐀀", "%uD800%uDC00", "%F0%90%80%80");
|
|
testUnescapeAndDecode("%F0%90%80%80", "\xF0\x90\x80\x80", "𐀀");
|
|
testEscapeAndEncode("𦏵", "%uD858%uDFF5", "%F0%A6%8F%B5");
|
|
testUnescapeAndDecode("%F0%A6%8F%B5", "\xF0\xA6\x8F\xB5", "𦏵");
|
|
testEscapeAndEncode("", "%uD87F%uDFFF", "%F0%AF%BF%BF");
|
|
testUnescapeAndDecode("%F0%AF%BF%BF", "\xF0\xAF\xBF\xBF", "");
|
|
|
|
// --------
|
|
|
|
// Summarize.
|
|
|
|
var failuresMessage;
|
|
if (failureCount) {
|
|
failuresMessage = failureCount + " tests failed.";
|
|
} else {
|
|
failuresMessage = "No failures.";
|
|
}
|
|
document.writeln("<p>Testing complete. " + failuresMessage + "</p>");
|
|
|
|
// --------
|
|
|
|
</script>
|
|
|
|
</body>
|
|
|
|
</html>
|