haikuwebkit/LayoutTests/js/regexp-unicode-expected.txt

Test for unicode regular expression processing

On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".


PASS "a".match(/a/u)[0].length is 1
PASS "a".match(/A/ui)[0].length is 1
PASS "a".match(/a/u)[0].length is 1
PASS "a".match(/A/iu)[0].length is 1
PASS "Ȓ".match(/Ȓ/u)[0].length is 1
PASS "Ȓ".match(/Ȓ/u)[0].length is 1
PASS "ሴ".match(/ሴ/u)[0].length is 1
PASS "ሴ".match(/ሴ/u)[0].length is 1
PASS "⪼".match(/⪼/u)[0].length is 1
PASS "㿭".match(/㿭/u)[0].length is 1
PASS "𒍅".match(/𒍅/u)[0].length is 2
PASS "𒍅".match(/𒍅/u)[0].length is 2
PASS "𝌆".match(/𝌆/u)[0].length is 2
PASS /𐑏/u.test("𐑏") is true
PASS /𐑏/u.test("𐑏") is true
PASS "𝌆".match(/𝌆/u)[0].length is 2
PASS /(𐀀|𐐀|𐐩)/u.test("𐐀") is true
PASS "𐄣".match(/a|𐄣|b/u)[0].length is 2
PASS "b".match(/a|𐄣|b/u)[0].length is 1
PASS /(?:a|𐄣|b)x/u.test("𐄣") is false
PASS /(?:a|𐄣|b)x/u.test("𐄣x") is true
PASS /(?:a|𐄣|b)x/u.test("b") is false
PASS /(?:a|𐄣|b)x/u.test("bx") is true
PASS "a𐄣x".match(/a𐄣b|a𐄣x/u)[0].length is 4
PASS /(𐀀|𐐀|𐐩)x/ui.test("𐐀x") is true
PASS /(𐀀|𐐀|𐐩)x/ui.test("𐐩x") is true
PASS /(𐀀|𐐀|𐐩)x/ui.test("𐐁x") is true
PASS /(𐀀|𐐀|𐐩)x/ui.test("𐐨x") is true
PASS "𐐩".match(/a|𐐁|b/iu)[0].length is 2
PASS "B".match(/a|𐄣|b/iu)[0].length is 1
PASS /(?:A|𐄣|b)x/iu.test("𐄣") is false
PASS /(?:A|𐄣|b)x/iu.test("𐄣x") is true
PASS /(?:A|𐄣|b)x/iu.test("b") is false
PASS /(?:A|𐄣|b)x/iu.test("bx") is true
PASS "a𐄣X".match(/a𐄣b|a𐄣x/iu)[0].length is 4
PASS "Ťx".match(/ťx/iu)[0].length is 2
PASS /\w/iu.test("ſ") is true
PASS /\w/iu.test("K") is true
PASS /\W/iu.test("ſ") is false
PASS /\W/iu.test("K") is false
PASS /[\w\d]/iu.test("ſ") is true
PASS /[\w\d]/iu.test("K") is true
PASS /[^\w\d]/iu.test("ſ") is false
PASS /[^\w\d]/iu.test("K") is false
PASS /[\W\d]/iu.test("ſ") is false
PASS /[\W\d]/iu.test("K") is false
PASS /[^\W\d]/iu.test("ſ") is true
PASS /[^\W\d]/iu.test("K") is true
PASS /\w/iu.test("S") is true
PASS /\w/iu.test("K") is true
PASS /\W/iu.test("S") is false
PASS /\W/iu.test("K") is false
PASS /[\w\d]/iu.test("S") is true
PASS /[\w\d]/iu.test("K") is true
PASS /[^\w\d]/iu.test("S") is false
PASS /[^\w\d]/iu.test("K") is false
PASS /[\W\d]/iu.test("S") is false
PASS /[\W\d]/iu.test("K") is false
PASS /[^\W\d]/iu.test("S") is true
PASS /[^\W\d]/iu.test("K") is true
PASS "Grasſoden is old German for grass".match(/.*?\Bs\u017foden/iu)[0] is "Grasſoden"
PASS "Grasſoden is old German for grass".match(/.*?\B\u017foden/iu)[0] is "Grasſoden"
PASS "Grasſoden is old German for grass".match(/.*?\Boden/iu)[0] is "Grasſoden"
PASS "Grasſoden is old German for grass".match(/.*?\Bden/iu)[0] is "Grasſoden"
PASS "Water freezes at 273K which is 0C.".split(/\b\s/iu) is ["Water","freezes","at","273K","which","is","0C."]
PASS "𝌆".match(/^.$/u)[0].length is 2
PASS "It is 78°".match(/.*/u)[0].length is 9
PASS stringWithDanglingFirstSurrogate.match(/.*/u)[0].length is 3
PASS stringWithDanglingSecondSurrogate.match(/.*/u)[0].length is 3
PASS "𝌆".match(/[𝌆a]/)[0].length is 1
PASS "𝌆".match(/[a𝌆]/u)[0].length is 2
PASS "𝌆".match(/[𝌆a]/u)[0].length is 2
PASS "𝌆".match(/[a-𝌆]/)[0].length is 1
PASS "𝌆".match(/[a-𝌆]/u)[0].length is 2
PASS "X".match(/[ -𐑏]/u)[0].length is 1
PASS "က".match(/[ -𐑏]/u)[0].length is 1
PASS "𐐧".match(/[ -𐑏]/u)[0].length is 2
PASS re1.test("Z") is false
PASS re1.test("က") is false
PASS re1.test("𐐀") is false
PASS re2.test("A") is true
PASS re2.test("") is false
PASS re2.test("𒍅") is true
PASS /[𐰁<>#<23>]/u.exec("𐰁").toString() is "𐰁"
PASS /[<5B>𐰁<EFBFBD>]/u.exec("𐰁").toString() is "𐰁"
PASS /[<5B>#<23>𐰁]/u.exec("𐰁").toString() is "𐰁"
PASS /[<5B>𐰁<EFBFBD>]/u.exec("𐰁").toString() is "𐰁"
PASS /[𐰁<>#<23>]{2}/u.exec("𐰁") is null
PASS /[<5B>𐰁<EFBFBD>]{2}/u.exec("𐰁") is null
PASS /[<5B>#<23>𐰁]{2}/u.exec("𐰁") is null
PASS /[<5B>𐰁<EFBFBD>]{2}/u.exec("𐰁") is null
PASS /<2F>|<7C>|𐰁/u.exec("𐰁").toString() is "𐰁"
PASS /<2F>|𐰁|<7C>/u.exec("𐰁").toString() is "𐰁"
PASS /<2F>|<7C>|𐰁/u.exec("<22>").toString() is "<22>"
PASS /<2F>|𐰁|<7C>/u.exec("<22>").toString() is "<22>"
PASS /<2F>𐰁/u.exec("𐰁") is null
PASS /<2F>𐰁/u.exec("<22>") is null
PASS "<22>𐰁".match(/<2F>𐰁/u)[0].length is 3
PASS /𝌆{2}/u.test("𝌆𝌆") is true
PASS /𝌆{2}/u.test("𝌆𝌆") is true
PASS "𐐅𐐅𐐅𐐅".match(/𐐅{3}/u)[0] is "𐐅𐐅𐐅"
PASS "𐐂𐐅𐐅𐐅".match(/𐐅{3}/u)[0] is "𐐅𐐅𐐅"
PASS "𐐁𐐁𐐀".match(/𐐁{1,3}/u)[0] is "𐐁𐐁"
PASS "𐐁𐐩".match(/𐐁{1,3}/iu)[0] is "𐐁𐐩"
PASS "𐐁𐐩𐐪𐐩".match(/𐐁{1,}/iu)[0] is "𐐁𐐩"
PASS "𐌑𐌑𐌑".match(/𐌑*a|𐌑*./u)[0] is "𐌑𐌑𐌑"
PASS "a𐌑𐌑".match(/a𐌑*?$/u)[0] is "a𐌑𐌑"
PASS "a𐌑𐌑𐌑c".match(/a𐌑*cd|a𐌑*c/u)[0] is "a𐌑𐌑𐌑c"
PASS "a𐌑𐌑𐌑c".match(/a𐌑+cd|a𐌑+c/u)[0] is "a𐌑𐌑𐌑c"
PASS "𐌑𐌑𐌑".match(/𐌑+?a|𐌑+?./u)[0] is "𐌑𐌑"
PASS "𐌑𐌑𐌑".match(/𐌑+?a|𐌑+?$/u)[0] is "𐌑𐌑𐌑"
PASS "a𐌑𐌑𐌑c".match(/a𐌑*?cd|a𐌑*?c/u)[0] is "a𐌑𐌑𐌑c"
PASS "a𐌑𐌑𐌑c".match(/a𐌑+?cd|a𐌑+?c/u)[0] is "a𐌑𐌑𐌑c"
PASS "𐌑𐌑𐌑".match(/𐌑+?a|𐌑+?./iu)[0] is "𐌑𐌑"
PASS "𐐪𐐪𐌑".match(/𐐂*𐈀|𐐂*𐌑/iu)[0] is "𐐪𐐪𐌑"
PASS "𐐪𐐪𐌑".match(/𐐂+𐈀|𐐂+𐌑/iu)[0] is "𐐪𐐪𐌑"
PASS "𐐪𐐪𐌑".match(/𐐂*?𐈀|𐐂*?𐌑/iu)[0] is "𐐪𐐪𐌑"
PASS "𐐪𐐪𐌑".match(/𐐂+?𐈀|𐐂+?𐌑/iu)[0] is "𐐪𐐪𐌑"
PASS "ab𐌑c𐨁".match(/abc|ab𐌑cd|ab𐌑c𐨁d|ab𐌑c𐨁/u)[0] is "ab𐌑c𐨁"
PASS "ab𐐨c𐨁".match(/abc|ab𐐀cd|ab𐐀c𐨁d|ab𐐀c𐨁/iu)[0] is "ab𐐨c𐨁"
PASS /abc|ab𐐀cd|ab𐐀c𐨁d|ab𐐀c𐨁/iu.test("qwerty123") is false
PASS "a𐐨𐐨𐐨c".match(/ac|a𐐀*cd|a𐐀+cd|a𐐀+c/iu)[0] is "a𐐨𐐨𐐨c"
PASS "ab𐐨𐐨𐐨c𐨁".match(/abc|ab𐐀*cd|ab𐐀+c𐨁d|ab𐐀+c𐨁/iu)[0] is "ab𐐨𐐨𐐨c𐨁"
PASS "ab𐐨𐐨𐐨".match(/abc|ab𐐨*./u)[0] is "ab𐐨𐐨𐐨"
PASS "ab𐐨𐐨𐐨".match(/abc|ab𐐀*./iu)[0] is "ab𐐨𐐨𐐨"
PASS "𐐀".match(/a*/u)[0].length is 0
PASS "𐐀".match(/a*/ui)[0].length is 0
PASS "𐐀".match(/\d*/u)[0].length is 0
PASS "123𐐀".match(/\d*/u)[0] is "123"
PASS "12X3𐐀4".match(/\d{0,1}/ug) is ["1", "2", "", "3", "", "4", ""]
PASS "𐐂𐐅𐐅𐐂𐐅𐐅𐐅".match(/𐐅{3}/u)[0] is "𐐅𐐅𐐅"
PASS "a𐐐𐐐b".match(/a(𐐐*?)bc|a(𐐐*?)b/ui)[0] is "a𐐐𐐐b"
PASS match3[0] is "a𐐐𐐐b"
PASS match3[1] is undefined.
PASS match3[2] is "a𐐐𐐐b"
PASS match4[0] is "a𐐸𐐸b"
PASS match4[1] is undefined.
PASS match4[2] is "𐐸𐐸"
PASS match5[0] is "a𐐒𐐒b𐐒𐐒"
PASS match5[1] is undefined.
PASS match5[2] is "𐐒𐐒"
PASS match6[0] is "a𐐒𐐒b𐐺𐐒"
PASS match6[1] is undefined.
PASS match6[2] is "𐐒𐐒"
PASS /ſtop/ui.test("stop") is true
PASS /stop/ui.test("ſtop") is true
PASS /Kelvin/ui.test("kelvin") is true
PASS /KELVIN/ui.test("Kelvin") is true
PASS /\u{1}/.test("u") is true
PASS /\u{4}/.test("u") is false
PASS /\u{4}/.test("uuuu") is true
PASS "800-555-1212".match(/[0-9\-]*/u)[0].length is 12
PASS "🂡🃑🂸🃉🃚".match(re7)[0] is "🂡🃑"
PASS "🂡🃑🂱🃉🃚".match(re7)[0] is "🂡🃑🂱"
PASS "🂡🃑🂱🃁🃚".match(re7)[0] is "🂡🃑🂱🃁"
PASS "🂣🃑🂱🃁🃚".match(re7)[0] is "🃑🂱🃁"
PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]*a|[𐌐𐌑]*./iu)[0] is "𐌑𐌐𐌑"
PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]*?a|[𐌐𐌑]*?./iu)[0] is "𐌑"
PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]+a|[𐌐𐌑]+./iu)[0] is "𐌑𐌐𐌑"
PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]+?a|[𐌐𐌑]+?./iu)[0] is "𐌑𐌐"
PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]+?a$|[𐌐𐌑]+?.$/iu)[0] is "𐌑𐌐𐌑"
PASS "𐌑𐌐𐌑".match(/[𐌁𐌑x]+a|[𐌐𐌑x]+./iu)[0] is "𐌑𐌐𐌑"
PASS "𐌑𐌐𐌑".match(/[𐌁𐌑x]+?a|[𐌐𐌑x]+?./iu)[0] is "𐌑𐌐"
PASS "C83|НАЧАТЬ".match(re8)[0] is "C83|НАЧАТЬ"
PASS "This.Is.16.Chars|НАЧАТЬ".match(re8)[0] is "This.Is.16.Chars|НАЧАТЬ"
PASS "Testing\nሴ 1 2 3".match(/^[က-𐃿] 1 2 3/um)[0] is "ሴ 1 2 3"
PASS "Testing\n𐃰 1 2 3".match(/^[က-𐃿] 1 2 3/um)[0] is "𐃰 1 2 3"
PASS "g\nሴ 1 2 3".match(/g\n^[က-𐃿] 1 2 3/um)[0] is "g\nሴ 1 2 3"
PASS "g\n𐃰 1 2 3".match(/g\n^[က-𐃿] 1 2 3/um)[0] is "g\n𐃰 1 2 3"
PASS "Testing ሴ\n1 2 3".match(/Testing [က-𐃿]$/um)[0] is "Testing ሴ"
PASS "Testing 𐃰\n1 2 3".match(/Testing [က-𐃿]$/um)[0] is "Testing 𐃰"
PASS "Testing ሴ\n1 2 3".match(/g [က-𐃿]$\n1/um)[0] is "g ሴ\n1"
PASS "Testing 𐃰\n1 2 3".match(/g [က-𐃿]$\n1/um)[0] is "g 𐃰\n1"
PASS "this is ba test".match(/is b\cha test/u)[0].length is 11
PASS new RegExp("\\/", "u").source is "\\/"
PASS r = new RegExp("\\u{110000}", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape.
PASS r = new RegExp("𐐅{2147483648}", "u") threw exception SyntaxError: Invalid regular expression: pattern exceeds string length limits.
PASS /{/u threw exception SyntaxError: Invalid regular expression: incomplete {} quantifier for Unicode pattern.
PASS /[a-\d]/u threw exception SyntaxError: Invalid regular expression: invalid range in character class for Unicode pattern.
PASS /]/u threw exception SyntaxError: Invalid regular expression: unmatched ] or } bracket for Unicode pattern.
PASS /\5/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /\01/u threw exception SyntaxError: Invalid regular expression: invalid octal escape for Unicode pattern.
PASS /[\23]/u threw exception SyntaxError: Invalid regular expression: invalid octal escape for Unicode pattern.
PASS /\c9/u threw exception SyntaxError: Invalid regular expression: invalid \c escape for Unicode pattern.
PASS r = new RegExp("\\-", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
PASS r = new RegExp("\\a", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
PASS r = new RegExp("[\\a]", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
PASS r = new RegExp("[\\B]", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
PASS r = new RegExp("\\x", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
PASS r = new RegExp("[\\x]", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
PASS r = new RegExp("\\u", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode \u escape.
PASS r = new RegExp("[\\u]", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode \u escape.
PASS r = new RegExp("\\u{", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape.
PASS r = new RegExp("\\u{\udead", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape.
PASS /\1/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /\2/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /\3/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /\4/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /\5/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /\6/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /\7/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /\8/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /\9/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /(.)\1/u did not throw exception.
PASS /(.)(.)\2/u did not throw exception.
PASS /(.)(.)\3/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
PASS /\1/ did not throw exception.
PASS /\2/ did not throw exception.
PASS /\3/ did not throw exception.
PASS /\4/ did not throw exception.
PASS /\5/ did not throw exception.
PASS /\6/ did not throw exception.
PASS /\7/ did not throw exception.
PASS /\8/ did not throw exception.
PASS /\9/ did not throw exception.
PASS successfullyParsed is true

TEST COMPLETE
-												[ES6] Add support for Unicode regular expressions
https://bugs.webkit.org/show_bug.cgi?id=154842

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Added processing of Unicode regular expressions to the Yarr interpreter.

Changed parsing of regular expression patterns and PatternTerms to process characters as
UChar32 in the Yarr code.  The parser converts matched surrogate pairs into the appropriate
Unicode character when the expression is parsed.  When matching a unicode expression and
reading source characters, we convert proper surrogate pair into a Unicode character and
advance the source cursor, "pos", one more position.  The exception to this is when we
know when generating a fixed character atom that we need to match a unicode character
that doesn't fit in 16 bits.  The code calls this an extendedUnicodeCharacter and has a
helper to determine this.

Added 'u' flag and 'unicode' identifier to regular expression classes.  Added an "isUnicode"
parameter to YarrPattern pattern() and internal users of that function.

Updated the generation of the canonicalization tables to include a new set a tables that
follow the ES 6.0, 21.2.2.8.2 Step 2.  Renamed the YarrCanonicalizeUCS2.* files to
YarrCanonicalizeUnicode.*.

Added a new Layout/js test that tests the added functionality.  Updated other tests that
have minor es6 unicode checks and look for valid flags.

Ran the ChakraCore Unicode regular expression tests as well.

* CMakeLists.txt:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters:
* JavaScriptCore.xcodeproj/project.pbxproj:

* inspector/ContentSearchUtilities.cpp:
(Inspector::ContentSearchUtilities::findMagicComment):
* yarr/RegularExpression.cpp:
(JSC::Yarr::RegularExpression::Private::compile):
Updated use of pattern().

* runtime/CommonIdentifiers.h:
* runtime/RegExp.cpp:
(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):
(JSC::RegExp::finishCreation):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpKey.h:
* runtime/RegExpPrototype.cpp:
(JSC::regExpProtoFuncCompile):
(JSC::flagsString):
(JSC::regExpProtoGetterMultiline):
(JSC::regExpProtoGetterUnicode):
(JSC::regExpProtoGetterFlags):
Updated for new 'y' (unicode) flag.  Add check to use the interpreter for unicode regular expressions.

* tests/es6.yaml:
* tests/stress/static-getter-in-names.js:
Updated tests for new flag and for passing the minimal es6 regular expression processing.

* yarr/Yarr.h: Updated the size of information now kept for backtracking.

* yarr/YarrCanonicalizeUCS2.cpp: Removed.
* yarr/YarrCanonicalizeUCS2.h: Removed.
* yarr/YarrCanonicalizeUCS2.js: Removed.
* yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp.
* yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h.
(JSC::Yarr::canonicalCharacterSetInfo):
(JSC::Yarr::canonicalRangeInfoFor):
(JSC::Yarr::getCanonicalPair):
(JSC::Yarr::isCanonicallyUnique):
(JSC::Yarr::areCanonicallyEquivalent):
(JSC::Yarr::rangeInfoFor): Deleted.
* yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js.
(printHeader):
(printFooter):
(hex):
(canonicalize):
(canonicalizeUnicode):
(createUCS2CanonicalGroups):
(createUnicodeCanonicalGroups):
(cu.in.groupedCanonically.characters.sort): Deleted.
(cu.in.groupedCanonically.else): Deleted.
Refactored to output two sets of tables, one for UCS2 and one for Unicode.  The UCS2 tables follow
the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3.  The new Unicode
tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2.  Eliminated the unused Latin1 tables.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkSurrogatePair):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrInterpreter.h:
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumePossibleSurrogatePair):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::patternRemaining):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::YarrPattern::reset):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::assertionBOL):
(JSC::Yarr::SyntaxChecker::assertionEOL):
(JSC::Yarr::SyntaxChecker::assertionWordBoundary):
(JSC::Yarr::SyntaxChecker::atomPatternCharacter):
(JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass):
(JSC::Yarr::SyntaxChecker::atomCharacterClassBegin):
(JSC::Yarr::SyntaxChecker::atomCharacterClassAtom):
(JSC::Yarr::checkSyntax):

LayoutTests:

Added a new test for the added unicode regular expression processing.

Updated several tests for the y flag changes and "unicode" property.

* js/regexp-unicode-expected.txt: Added.
* js/regexp-unicode.html: Added.
* js/script-tests/regexp-unicode.js: Added.
New test.

* js/Object-getOwnPropertyNames-expected.txt:
* js/regexp-flags-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:
* js/script-tests/regexp-flags.js:
(RegExp.prototype.hasOwnProperty):
Updated tests.


Canonical link: https://commits.webkit.org/172980@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197426 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-02 00:39:01 +00:00
+								Test for unicode regular expression processing
 								On success, you will see a series of "PASS" messages, followed by "TEST COMPLETE".
-												[ES6] Make Unicode RegExp pattern parsing conform to the spec
https://bugs.webkit.org/show_bug.cgi?id=154988

Reviewed by Benjamin Poulain.

Source/JavaScriptCore:

Updated RegExp pattern processing with 'u' (Unicode) flag to conform to the
spec (https://tc39.github.io/ecma262/2016/#sec-patterns).  In the spec, the
grammar is annotated with [U] annotations.  Productions that are prefixed with
[+U] are only available with the Unicode flags while productions prefixed with
[~U] are only available without the Unicode flag.

Added flags argument to Yarr::checkSyntax() so we can catch Unicode flag related
parsing errors at syntax checking time.  Restricted what escapes are available for
non Unicode patterns.  Most of this is defined in the IdentityEscape rule in the
pattern grammar.

Added \- as a CharacterClass only escape in Unicode patterns.

Updated the tests for these changes.

Made changes suggested in https://bugs.webkit.org/show_bug.cgi?id=154842#c22 after
change set r197426 was landed.

* parser/ASTBuilder.h:
(JSC::ASTBuilder::createRegExp):
* parser/Parser.cpp:
(JSC::Parser<LexerType>::parsePrimaryExpression):
* parser/SyntaxChecker.h:
(JSC::SyntaxChecker::createRegExp):
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::uncheckInput):
(JSC::Yarr::Interpreter::InputStream::atStart):
(JSC::Yarr::Interpreter::InputStream::atEnd):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::isIdentityEscapeAnError):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::disjunction):
(JSC::Yarr::checkSyntax):
* yarr/YarrSyntaxChecker.h:

LayoutTests:

Added tests cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:
(shouldThrowInvalidEscape):


[ES6] Add support for Symbol.toPrimitive
https://bugs.webkit.org/show_bug.cgi?id=154877

Reviewed by Saam Barati.

Update test for Symbol.toPrimitive.

* js/Object-getOwnPropertyNames-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:


Canonical link: https://commits.webkit.org/173079@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197534 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-04 01:24:28 +00:00
+								PASS "a".match(/a/u)[0].length is 1
 								PASS "a".match(/A/ui)[0].length is 1
-												[ES6] Add support for Unicode regular expressions
https://bugs.webkit.org/show_bug.cgi?id=154842

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Added processing of Unicode regular expressions to the Yarr interpreter.

Changed parsing of regular expression patterns and PatternTerms to process characters as
UChar32 in the Yarr code.  The parser converts matched surrogate pairs into the appropriate
Unicode character when the expression is parsed.  When matching a unicode expression and
reading source characters, we convert proper surrogate pair into a Unicode character and
advance the source cursor, "pos", one more position.  The exception to this is when we
know when generating a fixed character atom that we need to match a unicode character
that doesn't fit in 16 bits.  The code calls this an extendedUnicodeCharacter and has a
helper to determine this.

Added 'u' flag and 'unicode' identifier to regular expression classes.  Added an "isUnicode"
parameter to YarrPattern pattern() and internal users of that function.

Updated the generation of the canonicalization tables to include a new set a tables that
follow the ES 6.0, 21.2.2.8.2 Step 2.  Renamed the YarrCanonicalizeUCS2.* files to
YarrCanonicalizeUnicode.*.

Added a new Layout/js test that tests the added functionality.  Updated other tests that
have minor es6 unicode checks and look for valid flags.

Ran the ChakraCore Unicode regular expression tests as well.

* CMakeLists.txt:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters:
* JavaScriptCore.xcodeproj/project.pbxproj:

* inspector/ContentSearchUtilities.cpp:
(Inspector::ContentSearchUtilities::findMagicComment):
* yarr/RegularExpression.cpp:
(JSC::Yarr::RegularExpression::Private::compile):
Updated use of pattern().

* runtime/CommonIdentifiers.h:
* runtime/RegExp.cpp:
(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):
(JSC::RegExp::finishCreation):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpKey.h:
* runtime/RegExpPrototype.cpp:
(JSC::regExpProtoFuncCompile):
(JSC::flagsString):
(JSC::regExpProtoGetterMultiline):
(JSC::regExpProtoGetterUnicode):
(JSC::regExpProtoGetterFlags):
Updated for new 'y' (unicode) flag.  Add check to use the interpreter for unicode regular expressions.

* tests/es6.yaml:
* tests/stress/static-getter-in-names.js:
Updated tests for new flag and for passing the minimal es6 regular expression processing.

* yarr/Yarr.h: Updated the size of information now kept for backtracking.

* yarr/YarrCanonicalizeUCS2.cpp: Removed.
* yarr/YarrCanonicalizeUCS2.h: Removed.
* yarr/YarrCanonicalizeUCS2.js: Removed.
* yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp.
* yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h.
(JSC::Yarr::canonicalCharacterSetInfo):
(JSC::Yarr::canonicalRangeInfoFor):
(JSC::Yarr::getCanonicalPair):
(JSC::Yarr::isCanonicallyUnique):
(JSC::Yarr::areCanonicallyEquivalent):
(JSC::Yarr::rangeInfoFor): Deleted.
* yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js.
(printHeader):
(printFooter):
(hex):
(canonicalize):
(canonicalizeUnicode):
(createUCS2CanonicalGroups):
(createUnicodeCanonicalGroups):
(cu.in.groupedCanonically.characters.sort): Deleted.
(cu.in.groupedCanonically.else): Deleted.
Refactored to output two sets of tables, one for UCS2 and one for Unicode.  The UCS2 tables follow
the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3.  The new Unicode
tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2.  Eliminated the unused Latin1 tables.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkSurrogatePair):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrInterpreter.h:
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumePossibleSurrogatePair):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::patternRemaining):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::YarrPattern::reset):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::assertionBOL):
(JSC::Yarr::SyntaxChecker::assertionEOL):
(JSC::Yarr::SyntaxChecker::assertionWordBoundary):
(JSC::Yarr::SyntaxChecker::atomPatternCharacter):
(JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass):
(JSC::Yarr::SyntaxChecker::atomCharacterClassBegin):
(JSC::Yarr::SyntaxChecker::atomCharacterClassAtom):
(JSC::Yarr::checkSyntax):

LayoutTests:

Added a new test for the added unicode regular expression processing.

Updated several tests for the y flag changes and "unicode" property.

* js/regexp-unicode-expected.txt: Added.
* js/regexp-unicode.html: Added.
* js/script-tests/regexp-unicode.js: Added.
New test.

* js/Object-getOwnPropertyNames-expected.txt:
* js/regexp-flags-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:
* js/script-tests/regexp-flags.js:
(RegExp.prototype.hasOwnProperty):
Updated tests.


Canonical link: https://commits.webkit.org/172980@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197426 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-02 00:39:01 +00:00
+								PASS "a".match(/a/u)[0].length is 1
 								PASS "a".match(/A/iu)[0].length is 1
 								PASS "Ȓ".match(/Ȓ/u)[0].length is 1
-												[ES6] Make Unicode RegExp pattern parsing conform to the spec
https://bugs.webkit.org/show_bug.cgi?id=154988

Reviewed by Benjamin Poulain.

Source/JavaScriptCore:

Updated RegExp pattern processing with 'u' (Unicode) flag to conform to the
spec (https://tc39.github.io/ecma262/2016/#sec-patterns).  In the spec, the
grammar is annotated with [U] annotations.  Productions that are prefixed with
[+U] are only available with the Unicode flags while productions prefixed with
[~U] are only available without the Unicode flag.

Added flags argument to Yarr::checkSyntax() so we can catch Unicode flag related
parsing errors at syntax checking time.  Restricted what escapes are available for
non Unicode patterns.  Most of this is defined in the IdentityEscape rule in the
pattern grammar.

Added \- as a CharacterClass only escape in Unicode patterns.

Updated the tests for these changes.

Made changes suggested in https://bugs.webkit.org/show_bug.cgi?id=154842#c22 after
change set r197426 was landed.

* parser/ASTBuilder.h:
(JSC::ASTBuilder::createRegExp):
* parser/Parser.cpp:
(JSC::Parser<LexerType>::parsePrimaryExpression):
* parser/SyntaxChecker.h:
(JSC::SyntaxChecker::createRegExp):
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::uncheckInput):
(JSC::Yarr::Interpreter::InputStream::atStart):
(JSC::Yarr::Interpreter::InputStream::atEnd):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::isIdentityEscapeAnError):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::disjunction):
(JSC::Yarr::checkSyntax):
* yarr/YarrSyntaxChecker.h:

LayoutTests:

Added tests cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:
(shouldThrowInvalidEscape):


[ES6] Add support for Symbol.toPrimitive
https://bugs.webkit.org/show_bug.cgi?id=154877

Reviewed by Saam Barati.

Update test for Symbol.toPrimitive.

* js/Object-getOwnPropertyNames-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:


Canonical link: https://commits.webkit.org/173079@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197534 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-04 01:24:28 +00:00
+								PASS "Ȓ".match(/Ȓ/u)[0].length is 1
-												[ES6] Add support for Unicode regular expressions
https://bugs.webkit.org/show_bug.cgi?id=154842

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Added processing of Unicode regular expressions to the Yarr interpreter.

Changed parsing of regular expression patterns and PatternTerms to process characters as
UChar32 in the Yarr code.  The parser converts matched surrogate pairs into the appropriate
Unicode character when the expression is parsed.  When matching a unicode expression and
reading source characters, we convert proper surrogate pair into a Unicode character and
advance the source cursor, "pos", one more position.  The exception to this is when we
know when generating a fixed character atom that we need to match a unicode character
that doesn't fit in 16 bits.  The code calls this an extendedUnicodeCharacter and has a
helper to determine this.

Added 'u' flag and 'unicode' identifier to regular expression classes.  Added an "isUnicode"
parameter to YarrPattern pattern() and internal users of that function.

Updated the generation of the canonicalization tables to include a new set a tables that
follow the ES 6.0, 21.2.2.8.2 Step 2.  Renamed the YarrCanonicalizeUCS2.* files to
YarrCanonicalizeUnicode.*.

Added a new Layout/js test that tests the added functionality.  Updated other tests that
have minor es6 unicode checks and look for valid flags.

Ran the ChakraCore Unicode regular expression tests as well.

* CMakeLists.txt:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters:
* JavaScriptCore.xcodeproj/project.pbxproj:

* inspector/ContentSearchUtilities.cpp:
(Inspector::ContentSearchUtilities::findMagicComment):
* yarr/RegularExpression.cpp:
(JSC::Yarr::RegularExpression::Private::compile):
Updated use of pattern().

* runtime/CommonIdentifiers.h:
* runtime/RegExp.cpp:
(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):
(JSC::RegExp::finishCreation):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpKey.h:
* runtime/RegExpPrototype.cpp:
(JSC::regExpProtoFuncCompile):
(JSC::flagsString):
(JSC::regExpProtoGetterMultiline):
(JSC::regExpProtoGetterUnicode):
(JSC::regExpProtoGetterFlags):
Updated for new 'y' (unicode) flag.  Add check to use the interpreter for unicode regular expressions.

* tests/es6.yaml:
* tests/stress/static-getter-in-names.js:
Updated tests for new flag and for passing the minimal es6 regular expression processing.

* yarr/Yarr.h: Updated the size of information now kept for backtracking.

* yarr/YarrCanonicalizeUCS2.cpp: Removed.
* yarr/YarrCanonicalizeUCS2.h: Removed.
* yarr/YarrCanonicalizeUCS2.js: Removed.
* yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp.
* yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h.
(JSC::Yarr::canonicalCharacterSetInfo):
(JSC::Yarr::canonicalRangeInfoFor):
(JSC::Yarr::getCanonicalPair):
(JSC::Yarr::isCanonicallyUnique):
(JSC::Yarr::areCanonicallyEquivalent):
(JSC::Yarr::rangeInfoFor): Deleted.
* yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js.
(printHeader):
(printFooter):
(hex):
(canonicalize):
(canonicalizeUnicode):
(createUCS2CanonicalGroups):
(createUnicodeCanonicalGroups):
(cu.in.groupedCanonically.characters.sort): Deleted.
(cu.in.groupedCanonically.else): Deleted.
Refactored to output two sets of tables, one for UCS2 and one for Unicode.  The UCS2 tables follow
the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3.  The new Unicode
tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2.  Eliminated the unused Latin1 tables.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkSurrogatePair):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrInterpreter.h:
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumePossibleSurrogatePair):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::patternRemaining):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::YarrPattern::reset):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::assertionBOL):
(JSC::Yarr::SyntaxChecker::assertionEOL):
(JSC::Yarr::SyntaxChecker::assertionWordBoundary):
(JSC::Yarr::SyntaxChecker::atomPatternCharacter):
(JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass):
(JSC::Yarr::SyntaxChecker::atomCharacterClassBegin):
(JSC::Yarr::SyntaxChecker::atomCharacterClassAtom):
(JSC::Yarr::checkSyntax):

LayoutTests:

Added a new test for the added unicode regular expression processing.

Updated several tests for the y flag changes and "unicode" property.

* js/regexp-unicode-expected.txt: Added.
* js/regexp-unicode.html: Added.
* js/script-tests/regexp-unicode.js: Added.
New test.

* js/Object-getOwnPropertyNames-expected.txt:
* js/regexp-flags-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:
* js/script-tests/regexp-flags.js:
(RegExp.prototype.hasOwnProperty):
Updated tests.


Canonical link: https://commits.webkit.org/172980@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197426 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-02 00:39:01 +00:00
+								PASS "ሴ".match(/ሴ/u)[0].length is 1
-												[ES6] Make Unicode RegExp pattern parsing conform to the spec
https://bugs.webkit.org/show_bug.cgi?id=154988

Reviewed by Benjamin Poulain.

Source/JavaScriptCore:

Updated RegExp pattern processing with 'u' (Unicode) flag to conform to the
spec (https://tc39.github.io/ecma262/2016/#sec-patterns).  In the spec, the
grammar is annotated with [U] annotations.  Productions that are prefixed with
[+U] are only available with the Unicode flags while productions prefixed with
[~U] are only available without the Unicode flag.

Added flags argument to Yarr::checkSyntax() so we can catch Unicode flag related
parsing errors at syntax checking time.  Restricted what escapes are available for
non Unicode patterns.  Most of this is defined in the IdentityEscape rule in the
pattern grammar.

Added \- as a CharacterClass only escape in Unicode patterns.

Updated the tests for these changes.

Made changes suggested in https://bugs.webkit.org/show_bug.cgi?id=154842#c22 after
change set r197426 was landed.

* parser/ASTBuilder.h:
(JSC::ASTBuilder::createRegExp):
* parser/Parser.cpp:
(JSC::Parser<LexerType>::parsePrimaryExpression):
* parser/SyntaxChecker.h:
(JSC::SyntaxChecker::createRegExp):
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::uncheckInput):
(JSC::Yarr::Interpreter::InputStream::atStart):
(JSC::Yarr::Interpreter::InputStream::atEnd):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::isIdentityEscapeAnError):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::disjunction):
(JSC::Yarr::checkSyntax):
* yarr/YarrSyntaxChecker.h:

LayoutTests:

Added tests cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:
(shouldThrowInvalidEscape):


[ES6] Add support for Symbol.toPrimitive
https://bugs.webkit.org/show_bug.cgi?id=154877

Reviewed by Saam Barati.

Update test for Symbol.toPrimitive.

* js/Object-getOwnPropertyNames-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:


Canonical link: https://commits.webkit.org/173079@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197534 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-04 01:24:28 +00:00
+								PASS "ሴ".match(/ሴ/u)[0].length is 1
 								PASS "⪼".match(/⪼/u)[0].length is 1
-												[ES6] Add support for Unicode regular expressions
https://bugs.webkit.org/show_bug.cgi?id=154842

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Added processing of Unicode regular expressions to the Yarr interpreter.

Changed parsing of regular expression patterns and PatternTerms to process characters as
UChar32 in the Yarr code.  The parser converts matched surrogate pairs into the appropriate
Unicode character when the expression is parsed.  When matching a unicode expression and
reading source characters, we convert proper surrogate pair into a Unicode character and
advance the source cursor, "pos", one more position.  The exception to this is when we
know when generating a fixed character atom that we need to match a unicode character
that doesn't fit in 16 bits.  The code calls this an extendedUnicodeCharacter and has a
helper to determine this.

Added 'u' flag and 'unicode' identifier to regular expression classes.  Added an "isUnicode"
parameter to YarrPattern pattern() and internal users of that function.

Updated the generation of the canonicalization tables to include a new set a tables that
follow the ES 6.0, 21.2.2.8.2 Step 2.  Renamed the YarrCanonicalizeUCS2.* files to
YarrCanonicalizeUnicode.*.

Added a new Layout/js test that tests the added functionality.  Updated other tests that
have minor es6 unicode checks and look for valid flags.

Ran the ChakraCore Unicode regular expression tests as well.

* CMakeLists.txt:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters:
* JavaScriptCore.xcodeproj/project.pbxproj:

* inspector/ContentSearchUtilities.cpp:
(Inspector::ContentSearchUtilities::findMagicComment):
* yarr/RegularExpression.cpp:
(JSC::Yarr::RegularExpression::Private::compile):
Updated use of pattern().

* runtime/CommonIdentifiers.h:
* runtime/RegExp.cpp:
(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):
(JSC::RegExp::finishCreation):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpKey.h:
* runtime/RegExpPrototype.cpp:
(JSC::regExpProtoFuncCompile):
(JSC::flagsString):
(JSC::regExpProtoGetterMultiline):
(JSC::regExpProtoGetterUnicode):
(JSC::regExpProtoGetterFlags):
Updated for new 'y' (unicode) flag.  Add check to use the interpreter for unicode regular expressions.

* tests/es6.yaml:
* tests/stress/static-getter-in-names.js:
Updated tests for new flag and for passing the minimal es6 regular expression processing.

* yarr/Yarr.h: Updated the size of information now kept for backtracking.

* yarr/YarrCanonicalizeUCS2.cpp: Removed.
* yarr/YarrCanonicalizeUCS2.h: Removed.
* yarr/YarrCanonicalizeUCS2.js: Removed.
* yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp.
* yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h.
(JSC::Yarr::canonicalCharacterSetInfo):
(JSC::Yarr::canonicalRangeInfoFor):
(JSC::Yarr::getCanonicalPair):
(JSC::Yarr::isCanonicallyUnique):
(JSC::Yarr::areCanonicallyEquivalent):
(JSC::Yarr::rangeInfoFor): Deleted.
* yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js.
(printHeader):
(printFooter):
(hex):
(canonicalize):
(canonicalizeUnicode):
(createUCS2CanonicalGroups):
(createUnicodeCanonicalGroups):
(cu.in.groupedCanonically.characters.sort): Deleted.
(cu.in.groupedCanonically.else): Deleted.
Refactored to output two sets of tables, one for UCS2 and one for Unicode.  The UCS2 tables follow
the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3.  The new Unicode
tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2.  Eliminated the unused Latin1 tables.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkSurrogatePair):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrInterpreter.h:
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumePossibleSurrogatePair):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::patternRemaining):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::YarrPattern::reset):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::assertionBOL):
(JSC::Yarr::SyntaxChecker::assertionEOL):
(JSC::Yarr::SyntaxChecker::assertionWordBoundary):
(JSC::Yarr::SyntaxChecker::atomPatternCharacter):
(JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass):
(JSC::Yarr::SyntaxChecker::atomCharacterClassBegin):
(JSC::Yarr::SyntaxChecker::atomCharacterClassAtom):
(JSC::Yarr::checkSyntax):

LayoutTests:

Added a new test for the added unicode regular expression processing.

Updated several tests for the y flag changes and "unicode" property.

* js/regexp-unicode-expected.txt: Added.
* js/regexp-unicode.html: Added.
* js/script-tests/regexp-unicode.js: Added.
New test.

* js/Object-getOwnPropertyNames-expected.txt:
* js/regexp-flags-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:
* js/script-tests/regexp-flags.js:
(RegExp.prototype.hasOwnProperty):
Updated tests.


Canonical link: https://commits.webkit.org/172980@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197426 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-02 00:39:01 +00:00
+								PASS "㿭".match(/㿭/u)[0].length is 1
 								PASS "𒍅".match(/𒍅/u)[0].length is 2
 								PASS "𒍅".match(/𒍅/u)[0].length is 2
-												[ES6] Make Unicode RegExp pattern parsing conform to the spec
https://bugs.webkit.org/show_bug.cgi?id=154988

Reviewed by Benjamin Poulain.

Source/JavaScriptCore:

Updated RegExp pattern processing with 'u' (Unicode) flag to conform to the
spec (https://tc39.github.io/ecma262/2016/#sec-patterns).  In the spec, the
grammar is annotated with [U] annotations.  Productions that are prefixed with
[+U] are only available with the Unicode flags while productions prefixed with
[~U] are only available without the Unicode flag.

Added flags argument to Yarr::checkSyntax() so we can catch Unicode flag related
parsing errors at syntax checking time.  Restricted what escapes are available for
non Unicode patterns.  Most of this is defined in the IdentityEscape rule in the
pattern grammar.

Added \- as a CharacterClass only escape in Unicode patterns.

Updated the tests for these changes.

Made changes suggested in https://bugs.webkit.org/show_bug.cgi?id=154842#c22 after
change set r197426 was landed.

* parser/ASTBuilder.h:
(JSC::ASTBuilder::createRegExp):
* parser/Parser.cpp:
(JSC::Parser<LexerType>::parsePrimaryExpression):
* parser/SyntaxChecker.h:
(JSC::SyntaxChecker::createRegExp):
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::uncheckInput):
(JSC::Yarr::Interpreter::InputStream::atStart):
(JSC::Yarr::Interpreter::InputStream::atEnd):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::isIdentityEscapeAnError):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::disjunction):
(JSC::Yarr::checkSyntax):
* yarr/YarrSyntaxChecker.h:

LayoutTests:

Added tests cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:
(shouldThrowInvalidEscape):


[ES6] Add support for Symbol.toPrimitive
https://bugs.webkit.org/show_bug.cgi?id=154877

Reviewed by Saam Barati.

Update test for Symbol.toPrimitive.

* js/Object-getOwnPropertyNames-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:


Canonical link: https://commits.webkit.org/173079@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197534 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-04 01:24:28 +00:00
+								PASS "𝌆".match(/𝌆/u)[0].length is 2
-												[ES6] Add support for Unicode regular expressions
https://bugs.webkit.org/show_bug.cgi?id=154842

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Added processing of Unicode regular expressions to the Yarr interpreter.

Changed parsing of regular expression patterns and PatternTerms to process characters as
UChar32 in the Yarr code.  The parser converts matched surrogate pairs into the appropriate
Unicode character when the expression is parsed.  When matching a unicode expression and
reading source characters, we convert proper surrogate pair into a Unicode character and
advance the source cursor, "pos", one more position.  The exception to this is when we
know when generating a fixed character atom that we need to match a unicode character
that doesn't fit in 16 bits.  The code calls this an extendedUnicodeCharacter and has a
helper to determine this.

Added 'u' flag and 'unicode' identifier to regular expression classes.  Added an "isUnicode"
parameter to YarrPattern pattern() and internal users of that function.

Updated the generation of the canonicalization tables to include a new set a tables that
follow the ES 6.0, 21.2.2.8.2 Step 2.  Renamed the YarrCanonicalizeUCS2.* files to
YarrCanonicalizeUnicode.*.

Added a new Layout/js test that tests the added functionality.  Updated other tests that
have minor es6 unicode checks and look for valid flags.

Ran the ChakraCore Unicode regular expression tests as well.

* CMakeLists.txt:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters:
* JavaScriptCore.xcodeproj/project.pbxproj:

* inspector/ContentSearchUtilities.cpp:
(Inspector::ContentSearchUtilities::findMagicComment):
* yarr/RegularExpression.cpp:
(JSC::Yarr::RegularExpression::Private::compile):
Updated use of pattern().

* runtime/CommonIdentifiers.h:
* runtime/RegExp.cpp:
(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):
(JSC::RegExp::finishCreation):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpKey.h:
* runtime/RegExpPrototype.cpp:
(JSC::regExpProtoFuncCompile):
(JSC::flagsString):
(JSC::regExpProtoGetterMultiline):
(JSC::regExpProtoGetterUnicode):
(JSC::regExpProtoGetterFlags):
Updated for new 'y' (unicode) flag.  Add check to use the interpreter for unicode regular expressions.

* tests/es6.yaml:
* tests/stress/static-getter-in-names.js:
Updated tests for new flag and for passing the minimal es6 regular expression processing.

* yarr/Yarr.h: Updated the size of information now kept for backtracking.

* yarr/YarrCanonicalizeUCS2.cpp: Removed.
* yarr/YarrCanonicalizeUCS2.h: Removed.
* yarr/YarrCanonicalizeUCS2.js: Removed.
* yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp.
* yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h.
(JSC::Yarr::canonicalCharacterSetInfo):
(JSC::Yarr::canonicalRangeInfoFor):
(JSC::Yarr::getCanonicalPair):
(JSC::Yarr::isCanonicallyUnique):
(JSC::Yarr::areCanonicallyEquivalent):
(JSC::Yarr::rangeInfoFor): Deleted.
* yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js.
(printHeader):
(printFooter):
(hex):
(canonicalize):
(canonicalizeUnicode):
(createUCS2CanonicalGroups):
(createUnicodeCanonicalGroups):
(cu.in.groupedCanonically.characters.sort): Deleted.
(cu.in.groupedCanonically.else): Deleted.
Refactored to output two sets of tables, one for UCS2 and one for Unicode.  The UCS2 tables follow
the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3.  The new Unicode
tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2.  Eliminated the unused Latin1 tables.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkSurrogatePair):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrInterpreter.h:
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumePossibleSurrogatePair):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::patternRemaining):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::YarrPattern::reset):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::assertionBOL):
(JSC::Yarr::SyntaxChecker::assertionEOL):
(JSC::Yarr::SyntaxChecker::assertionWordBoundary):
(JSC::Yarr::SyntaxChecker::atomPatternCharacter):
(JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass):
(JSC::Yarr::SyntaxChecker::atomCharacterClassBegin):
(JSC::Yarr::SyntaxChecker::atomCharacterClassAtom):
(JSC::Yarr::checkSyntax):

LayoutTests:

Added a new test for the added unicode regular expression processing.

Updated several tests for the y flag changes and "unicode" property.

* js/regexp-unicode-expected.txt: Added.
* js/regexp-unicode.html: Added.
* js/script-tests/regexp-unicode.js: Added.
New test.

* js/Object-getOwnPropertyNames-expected.txt:
* js/regexp-flags-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:
* js/script-tests/regexp-flags.js:
(RegExp.prototype.hasOwnProperty):
Updated tests.


Canonical link: https://commits.webkit.org/172980@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197426 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-02 00:39:01 +00:00
+								PASS /𐑏/u.test("𐑏") is true
 								PASS /𐑏/u.test("𐑏") is true
 								PASS "𝌆".match(/𝌆/u)[0].length is 2
 								PASS /(𐀀|𐐀|𐐩)/u.test("𐐀") is true
 								PASS "𐄣".match(/a|𐄣|b/u)[0].length is 2
 								PASS "b".match(/a|𐄣|b/u)[0].length is 1
 								PASS /(?:a|𐄣|b)x/u.test("𐄣") is false
 								PASS /(?:a|𐄣|b)x/u.test("𐄣x") is true
 								PASS /(?:a|𐄣|b)x/u.test("b") is false
 								PASS /(?:a|𐄣|b)x/u.test("bx") is true
 								PASS "a𐄣x".match(/a𐄣b|a𐄣x/u)[0].length is 4
 								PASS /(𐀀|𐐀|𐐩)x/ui.test("𐐀x") is true
 								PASS /(𐀀|𐐀|𐐩)x/ui.test("𐐩x") is true
 								PASS /(𐀀|𐐀|𐐩)x/ui.test("𐐁x") is true
 								PASS /(𐀀|𐐀|𐐩)x/ui.test("𐐨x") is true
 								PASS "𐐩".match(/a|𐐁|b/iu)[0].length is 2
 								PASS "B".match(/a|𐄣|b/iu)[0].length is 1
 								PASS /(?:A|𐄣|b)x/iu.test("𐄣") is false
 								PASS /(?:A|𐄣|b)x/iu.test("𐄣x") is true
 								PASS /(?:A|𐄣|b)x/iu.test("b") is false
 								PASS /(?:A|𐄣|b)x/iu.test("bx") is true
 								PASS "a𐄣X".match(/a𐄣b|a𐄣x/iu)[0].length is 4
 								PASS "Ťx".match(/ťx/iu)[0].length is 2
-												Some tests fail with ES6 `u` (Unicode) flag for regular expressions
https://bugs.webkit.org/show_bug.cgi?id=151597

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

Added two new tables to handle the anomolies of \w and \W CharacterClassEscapes
when specified in RegExp's with both the unicode and ignoreCase flags.  Given the
case folding rules described in the standard vie the meta function Canonicalize(),
which allow cross ASCII case folding when unicode is specified, the unicode characters
\u017f (small sharp s) and \u212a (kelvin symbol) are part of the \w (word) characterClassEscape.
This is true because they case fold to 's' and 'k' respectively.  Because they case fold
to lower case letters, the corresponding letters, 'k', 'K', 's' and 'S', are also matched with
\W with the unicode and ignoreCase flags.

* create_regex_tables:
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::YarrPattern::wordcharCharacterClass):
(JSC::Yarr::YarrPattern::wordUnicodeIgnoreCaseCharCharacterClass):
(JSC::Yarr::YarrPattern::nonwordcharCharacterClass):
(JSC::Yarr::YarrPattern::nonwordUnicodeIgnoreCaseCharCharacterClass):

LayoutTests:

Updated tests.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/174667@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@199523 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-04-14 00:47:40 +00:00
+								PASS /\w/iu.test("ſ") is true
 								PASS /\w/iu.test("K") is true
-												ES6 Change: Unify handling of RegExp CharacterClassEscapes \w and \W and Word Asserts \b and \B
https://bugs.webkit.org/show_bug.cgi?id=158505

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

This change makes it so that the CharacterClassEscape \w matches the inverse of
\W and vice versa for unicode, ignore case RegExp's.

Before this change, both /\w/ui and /\W/ui RegExp's would match the characters
k, K, s, S, \u017f (Latin Small Letter Long S) and \u212a (Kelvin Sign).
This was due to how the ES6 standard defined matching of character classes
specifically that the abstract operation "Canonicalize()" is called for the
character to be matched AND for the characters in the character class we are
matching against.  This change is to make \W always be the inverse of \w.
It is still the case that the characters that match against \w changes
depending on a regular expression's flags.

The only real changes occur for regular expressions with both the unicode and
ignore case flags set.  Updated the character class generator to make
nonwordUnicodeIgnoreCaseChar not include k, K, s, S, \u017f and \u212a.
Changed BytecodePattern.wordcharCharacterClass to use the correct
word character class for the flags.  Simplfied character class set up in
in the pattern to use m_pattern.wordUnicodeIgnoreCaseCharCharacterClass and
invert as appropriate when unicode and ignore case are both set.

* create_regex_tables:
* yarr/YarrInterpreter.h:
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):

LayoutTests:

Updated and added test cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/177243@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@202490 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-06-27 17:38:55 +00:00
+								PASS /\W/iu.test("ſ") is false
 								PASS /\W/iu.test("K") is false
-												Some tests fail with ES6 `u` (Unicode) flag for regular expressions
https://bugs.webkit.org/show_bug.cgi?id=151597

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

Added two new tables to handle the anomolies of \w and \W CharacterClassEscapes
when specified in RegExp's with both the unicode and ignoreCase flags.  Given the
case folding rules described in the standard vie the meta function Canonicalize(),
which allow cross ASCII case folding when unicode is specified, the unicode characters
\u017f (small sharp s) and \u212a (kelvin symbol) are part of the \w (word) characterClassEscape.
This is true because they case fold to 's' and 'k' respectively.  Because they case fold
to lower case letters, the corresponding letters, 'k', 'K', 's' and 'S', are also matched with
\W with the unicode and ignoreCase flags.

* create_regex_tables:
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::YarrPattern::wordcharCharacterClass):
(JSC::Yarr::YarrPattern::wordUnicodeIgnoreCaseCharCharacterClass):
(JSC::Yarr::YarrPattern::nonwordcharCharacterClass):
(JSC::Yarr::YarrPattern::nonwordUnicodeIgnoreCaseCharCharacterClass):

LayoutTests:

Updated tests.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/174667@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@199523 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-04-14 00:47:40 +00:00
+								PASS /[\w\d]/iu.test("ſ") is true
 								PASS /[\w\d]/iu.test("K") is true
 								PASS /[^\w\d]/iu.test("ſ") is false
 								PASS /[^\w\d]/iu.test("K") is false
-												ES6 Change: Unify handling of RegExp CharacterClassEscapes \w and \W and Word Asserts \b and \B
https://bugs.webkit.org/show_bug.cgi?id=158505

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

This change makes it so that the CharacterClassEscape \w matches the inverse of
\W and vice versa for unicode, ignore case RegExp's.

Before this change, both /\w/ui and /\W/ui RegExp's would match the characters
k, K, s, S, \u017f (Latin Small Letter Long S) and \u212a (Kelvin Sign).
This was due to how the ES6 standard defined matching of character classes
specifically that the abstract operation "Canonicalize()" is called for the
character to be matched AND for the characters in the character class we are
matching against.  This change is to make \W always be the inverse of \w.
It is still the case that the characters that match against \w changes
depending on a regular expression's flags.

The only real changes occur for regular expressions with both the unicode and
ignore case flags set.  Updated the character class generator to make
nonwordUnicodeIgnoreCaseChar not include k, K, s, S, \u017f and \u212a.
Changed BytecodePattern.wordcharCharacterClass to use the correct
word character class for the flags.  Simplfied character class set up in
in the pattern to use m_pattern.wordUnicodeIgnoreCaseCharCharacterClass and
invert as appropriate when unicode and ignore case are both set.

* create_regex_tables:
* yarr/YarrInterpreter.h:
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):

LayoutTests:

Updated and added test cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/177243@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@202490 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-06-27 17:38:55 +00:00
+								PASS /[\W\d]/iu.test("ſ") is false
 								PASS /[\W\d]/iu.test("K") is false
 								PASS /[^\W\d]/iu.test("ſ") is true
 								PASS /[^\W\d]/iu.test("K") is true
-												Some tests fail with ES6 `u` (Unicode) flag for regular expressions
https://bugs.webkit.org/show_bug.cgi?id=151597

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

Added two new tables to handle the anomolies of \w and \W CharacterClassEscapes
when specified in RegExp's with both the unicode and ignoreCase flags.  Given the
case folding rules described in the standard vie the meta function Canonicalize(),
which allow cross ASCII case folding when unicode is specified, the unicode characters
\u017f (small sharp s) and \u212a (kelvin symbol) are part of the \w (word) characterClassEscape.
This is true because they case fold to 's' and 'k' respectively.  Because they case fold
to lower case letters, the corresponding letters, 'k', 'K', 's' and 'S', are also matched with
\W with the unicode and ignoreCase flags.

* create_regex_tables:
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::YarrPattern::wordcharCharacterClass):
(JSC::Yarr::YarrPattern::wordUnicodeIgnoreCaseCharCharacterClass):
(JSC::Yarr::YarrPattern::nonwordcharCharacterClass):
(JSC::Yarr::YarrPattern::nonwordUnicodeIgnoreCaseCharCharacterClass):

LayoutTests:

Updated tests.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/174667@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@199523 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-04-14 00:47:40 +00:00
+								PASS /\w/iu.test("S") is true
 								PASS /\w/iu.test("K") is true
-												ES6 Change: Unify handling of RegExp CharacterClassEscapes \w and \W and Word Asserts \b and \B
https://bugs.webkit.org/show_bug.cgi?id=158505

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

This change makes it so that the CharacterClassEscape \w matches the inverse of
\W and vice versa for unicode, ignore case RegExp's.

Before this change, both /\w/ui and /\W/ui RegExp's would match the characters
k, K, s, S, \u017f (Latin Small Letter Long S) and \u212a (Kelvin Sign).
This was due to how the ES6 standard defined matching of character classes
specifically that the abstract operation "Canonicalize()" is called for the
character to be matched AND for the characters in the character class we are
matching against.  This change is to make \W always be the inverse of \w.
It is still the case that the characters that match against \w changes
depending on a regular expression's flags.

The only real changes occur for regular expressions with both the unicode and
ignore case flags set.  Updated the character class generator to make
nonwordUnicodeIgnoreCaseChar not include k, K, s, S, \u017f and \u212a.
Changed BytecodePattern.wordcharCharacterClass to use the correct
word character class for the flags.  Simplfied character class set up in
in the pattern to use m_pattern.wordUnicodeIgnoreCaseCharCharacterClass and
invert as appropriate when unicode and ignore case are both set.

* create_regex_tables:
* yarr/YarrInterpreter.h:
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):

LayoutTests:

Updated and added test cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/177243@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@202490 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-06-27 17:38:55 +00:00
+								PASS /\W/iu.test("S") is false
 								PASS /\W/iu.test("K") is false
-												Some tests fail with ES6 `u` (Unicode) flag for regular expressions
https://bugs.webkit.org/show_bug.cgi?id=151597

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

Added two new tables to handle the anomolies of \w and \W CharacterClassEscapes
when specified in RegExp's with both the unicode and ignoreCase flags.  Given the
case folding rules described in the standard vie the meta function Canonicalize(),
which allow cross ASCII case folding when unicode is specified, the unicode characters
\u017f (small sharp s) and \u212a (kelvin symbol) are part of the \w (word) characterClassEscape.
This is true because they case fold to 's' and 'k' respectively.  Because they case fold
to lower case letters, the corresponding letters, 'k', 'K', 's' and 'S', are also matched with
\W with the unicode and ignoreCase flags.

* create_regex_tables:
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::YarrPattern::wordcharCharacterClass):
(JSC::Yarr::YarrPattern::wordUnicodeIgnoreCaseCharCharacterClass):
(JSC::Yarr::YarrPattern::nonwordcharCharacterClass):
(JSC::Yarr::YarrPattern::nonwordUnicodeIgnoreCaseCharCharacterClass):

LayoutTests:

Updated tests.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/174667@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@199523 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-04-14 00:47:40 +00:00
+								PASS /[\w\d]/iu.test("S") is true
 								PASS /[\w\d]/iu.test("K") is true
 								PASS /[^\w\d]/iu.test("S") is false
 								PASS /[^\w\d]/iu.test("K") is false
-												ES6 Change: Unify handling of RegExp CharacterClassEscapes \w and \W and Word Asserts \b and \B
https://bugs.webkit.org/show_bug.cgi?id=158505

Reviewed by Geoffrey Garen.

Source/JavaScriptCore:

This change makes it so that the CharacterClassEscape \w matches the inverse of
\W and vice versa for unicode, ignore case RegExp's.

Before this change, both /\w/ui and /\W/ui RegExp's would match the characters
k, K, s, S, \u017f (Latin Small Letter Long S) and \u212a (Kelvin Sign).
This was due to how the ES6 standard defined matching of character classes
specifically that the abstract operation "Canonicalize()" is called for the
character to be matched AND for the characters in the character class we are
matching against.  This change is to make \W always be the inverse of \w.
It is still the case that the characters that match against \w changes
depending on a regular expression's flags.

The only real changes occur for regular expressions with both the unicode and
ignore case flags set.  Updated the character class generator to make
nonwordUnicodeIgnoreCaseChar not include k, K, s, S, \u017f and \u212a.
Changed BytecodePattern.wordcharCharacterClass to use the correct
word character class for the flags.  Simplfied character class set up in
in the pattern to use m_pattern.wordUnicodeIgnoreCaseCharCharacterClass and
invert as appropriate when unicode and ignore case are both set.

* create_regex_tables:
* yarr/YarrInterpreter.h:
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):

LayoutTests:

Updated and added test cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/177243@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@202490 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-06-27 17:38:55 +00:00
+								PASS /[\W\d]/iu.test("S") is false
 								PASS /[\W\d]/iu.test("K") is false
 								PASS /[^\W\d]/iu.test("S") is true
 								PASS /[^\W\d]/iu.test("K") is true
 								PASS "Grasſoden is old German for grass".match(/.*?\Bs\u017foden/iu)[0] is "Grasſoden"
 								PASS "Grasſoden is old German for grass".match(/.*?\B\u017foden/iu)[0] is "Grasſoden"
 								PASS "Grasſoden is old German for grass".match(/.*?\Boden/iu)[0] is "Grasſoden"
 								PASS "Grasſoden is old German for grass".match(/.*?\Bden/iu)[0] is "Grasſoden"
 								PASS "Water freezes at 273K which is 0C.".split(/\b\s/iu) is ["Water","freezes","at","273K","which","is","0C."]
-												[ES6] Add support for Unicode regular expressions
https://bugs.webkit.org/show_bug.cgi?id=154842

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Added processing of Unicode regular expressions to the Yarr interpreter.

Changed parsing of regular expression patterns and PatternTerms to process characters as
UChar32 in the Yarr code.  The parser converts matched surrogate pairs into the appropriate
Unicode character when the expression is parsed.  When matching a unicode expression and
reading source characters, we convert proper surrogate pair into a Unicode character and
advance the source cursor, "pos", one more position.  The exception to this is when we
know when generating a fixed character atom that we need to match a unicode character
that doesn't fit in 16 bits.  The code calls this an extendedUnicodeCharacter and has a
helper to determine this.

Added 'u' flag and 'unicode' identifier to regular expression classes.  Added an "isUnicode"
parameter to YarrPattern pattern() and internal users of that function.

Updated the generation of the canonicalization tables to include a new set a tables that
follow the ES 6.0, 21.2.2.8.2 Step 2.  Renamed the YarrCanonicalizeUCS2.* files to
YarrCanonicalizeUnicode.*.

Added a new Layout/js test that tests the added functionality.  Updated other tests that
have minor es6 unicode checks and look for valid flags.

Ran the ChakraCore Unicode regular expression tests as well.

* CMakeLists.txt:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters:
* JavaScriptCore.xcodeproj/project.pbxproj:

* inspector/ContentSearchUtilities.cpp:
(Inspector::ContentSearchUtilities::findMagicComment):
* yarr/RegularExpression.cpp:
(JSC::Yarr::RegularExpression::Private::compile):
Updated use of pattern().

* runtime/CommonIdentifiers.h:
* runtime/RegExp.cpp:
(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):
(JSC::RegExp::finishCreation):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpKey.h:
* runtime/RegExpPrototype.cpp:
(JSC::regExpProtoFuncCompile):
(JSC::flagsString):
(JSC::regExpProtoGetterMultiline):
(JSC::regExpProtoGetterUnicode):
(JSC::regExpProtoGetterFlags):
Updated for new 'y' (unicode) flag.  Add check to use the interpreter for unicode regular expressions.

* tests/es6.yaml:
* tests/stress/static-getter-in-names.js:
Updated tests for new flag and for passing the minimal es6 regular expression processing.

* yarr/Yarr.h: Updated the size of information now kept for backtracking.

* yarr/YarrCanonicalizeUCS2.cpp: Removed.
* yarr/YarrCanonicalizeUCS2.h: Removed.
* yarr/YarrCanonicalizeUCS2.js: Removed.
* yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp.
* yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h.
(JSC::Yarr::canonicalCharacterSetInfo):
(JSC::Yarr::canonicalRangeInfoFor):
(JSC::Yarr::getCanonicalPair):
(JSC::Yarr::isCanonicallyUnique):
(JSC::Yarr::areCanonicallyEquivalent):
(JSC::Yarr::rangeInfoFor): Deleted.
* yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js.
(printHeader):
(printFooter):
(hex):
(canonicalize):
(canonicalizeUnicode):
(createUCS2CanonicalGroups):
(createUnicodeCanonicalGroups):
(cu.in.groupedCanonically.characters.sort): Deleted.
(cu.in.groupedCanonically.else): Deleted.
Refactored to output two sets of tables, one for UCS2 and one for Unicode.  The UCS2 tables follow
the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3.  The new Unicode
tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2.  Eliminated the unused Latin1 tables.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkSurrogatePair):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrInterpreter.h:
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumePossibleSurrogatePair):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::patternRemaining):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::YarrPattern::reset):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::assertionBOL):
(JSC::Yarr::SyntaxChecker::assertionEOL):
(JSC::Yarr::SyntaxChecker::assertionWordBoundary):
(JSC::Yarr::SyntaxChecker::atomPatternCharacter):
(JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass):
(JSC::Yarr::SyntaxChecker::atomCharacterClassBegin):
(JSC::Yarr::SyntaxChecker::atomCharacterClassAtom):
(JSC::Yarr::checkSyntax):

LayoutTests:

Added a new test for the added unicode regular expression processing.

Updated several tests for the y flag changes and "unicode" property.

* js/regexp-unicode-expected.txt: Added.
* js/regexp-unicode.html: Added.
* js/script-tests/regexp-unicode.js: Added.
New test.

* js/Object-getOwnPropertyNames-expected.txt:
* js/regexp-flags-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:
* js/script-tests/regexp-flags.js:
(RegExp.prototype.hasOwnProperty):
Updated tests.


Canonical link: https://commits.webkit.org/172980@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197426 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-02 00:39:01 +00:00
+								PASS "𝌆".match(/^.$/u)[0].length is 2
 								PASS "It is 78°".match(/.*/u)[0].length is 9
-												[ES6] Make Unicode RegExp pattern parsing conform to the spec
https://bugs.webkit.org/show_bug.cgi?id=154988

Reviewed by Benjamin Poulain.

Source/JavaScriptCore:

Updated RegExp pattern processing with 'u' (Unicode) flag to conform to the
spec (https://tc39.github.io/ecma262/2016/#sec-patterns).  In the spec, the
grammar is annotated with [U] annotations.  Productions that are prefixed with
[+U] are only available with the Unicode flags while productions prefixed with
[~U] are only available without the Unicode flag.

Added flags argument to Yarr::checkSyntax() so we can catch Unicode flag related
parsing errors at syntax checking time.  Restricted what escapes are available for
non Unicode patterns.  Most of this is defined in the IdentityEscape rule in the
pattern grammar.

Added \- as a CharacterClass only escape in Unicode patterns.

Updated the tests for these changes.

Made changes suggested in https://bugs.webkit.org/show_bug.cgi?id=154842#c22 after
change set r197426 was landed.

* parser/ASTBuilder.h:
(JSC::ASTBuilder::createRegExp):
* parser/Parser.cpp:
(JSC::Parser<LexerType>::parsePrimaryExpression):
* parser/SyntaxChecker.h:
(JSC::SyntaxChecker::createRegExp):
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::uncheckInput):
(JSC::Yarr::Interpreter::InputStream::atStart):
(JSC::Yarr::Interpreter::InputStream::atEnd):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::isIdentityEscapeAnError):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::disjunction):
(JSC::Yarr::checkSyntax):
* yarr/YarrSyntaxChecker.h:

LayoutTests:

Added tests cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:
(shouldThrowInvalidEscape):


[ES6] Add support for Symbol.toPrimitive
https://bugs.webkit.org/show_bug.cgi?id=154877

Reviewed by Saam Barati.

Update test for Symbol.toPrimitive.

* js/Object-getOwnPropertyNames-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:


Canonical link: https://commits.webkit.org/173079@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197534 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-04 01:24:28 +00:00
+								PASS stringWithDanglingFirstSurrogate.match(/.*/u)[0].length is 3
 								PASS stringWithDanglingSecondSurrogate.match(/.*/u)[0].length is 3
-												[ES6] Add support for Unicode regular expressions
https://bugs.webkit.org/show_bug.cgi?id=154842

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Added processing of Unicode regular expressions to the Yarr interpreter.

Changed parsing of regular expression patterns and PatternTerms to process characters as
UChar32 in the Yarr code.  The parser converts matched surrogate pairs into the appropriate
Unicode character when the expression is parsed.  When matching a unicode expression and
reading source characters, we convert proper surrogate pair into a Unicode character and
advance the source cursor, "pos", one more position.  The exception to this is when we
know when generating a fixed character atom that we need to match a unicode character
that doesn't fit in 16 bits.  The code calls this an extendedUnicodeCharacter and has a
helper to determine this.

Added 'u' flag and 'unicode' identifier to regular expression classes.  Added an "isUnicode"
parameter to YarrPattern pattern() and internal users of that function.

Updated the generation of the canonicalization tables to include a new set a tables that
follow the ES 6.0, 21.2.2.8.2 Step 2.  Renamed the YarrCanonicalizeUCS2.* files to
YarrCanonicalizeUnicode.*.

Added a new Layout/js test that tests the added functionality.  Updated other tests that
have minor es6 unicode checks and look for valid flags.

Ran the ChakraCore Unicode regular expression tests as well.

* CMakeLists.txt:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters:
* JavaScriptCore.xcodeproj/project.pbxproj:

* inspector/ContentSearchUtilities.cpp:
(Inspector::ContentSearchUtilities::findMagicComment):
* yarr/RegularExpression.cpp:
(JSC::Yarr::RegularExpression::Private::compile):
Updated use of pattern().

* runtime/CommonIdentifiers.h:
* runtime/RegExp.cpp:
(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):
(JSC::RegExp::finishCreation):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpKey.h:
* runtime/RegExpPrototype.cpp:
(JSC::regExpProtoFuncCompile):
(JSC::flagsString):
(JSC::regExpProtoGetterMultiline):
(JSC::regExpProtoGetterUnicode):
(JSC::regExpProtoGetterFlags):
Updated for new 'y' (unicode) flag.  Add check to use the interpreter for unicode regular expressions.

* tests/es6.yaml:
* tests/stress/static-getter-in-names.js:
Updated tests for new flag and for passing the minimal es6 regular expression processing.

* yarr/Yarr.h: Updated the size of information now kept for backtracking.

* yarr/YarrCanonicalizeUCS2.cpp: Removed.
* yarr/YarrCanonicalizeUCS2.h: Removed.
* yarr/YarrCanonicalizeUCS2.js: Removed.
* yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp.
* yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h.
(JSC::Yarr::canonicalCharacterSetInfo):
(JSC::Yarr::canonicalRangeInfoFor):
(JSC::Yarr::getCanonicalPair):
(JSC::Yarr::isCanonicallyUnique):
(JSC::Yarr::areCanonicallyEquivalent):
(JSC::Yarr::rangeInfoFor): Deleted.
* yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js.
(printHeader):
(printFooter):
(hex):
(canonicalize):
(canonicalizeUnicode):
(createUCS2CanonicalGroups):
(createUnicodeCanonicalGroups):
(cu.in.groupedCanonically.characters.sort): Deleted.
(cu.in.groupedCanonically.else): Deleted.
Refactored to output two sets of tables, one for UCS2 and one for Unicode.  The UCS2 tables follow
the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3.  The new Unicode
tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2.  Eliminated the unused Latin1 tables.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkSurrogatePair):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrInterpreter.h:
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumePossibleSurrogatePair):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::patternRemaining):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::YarrPattern::reset):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::assertionBOL):
(JSC::Yarr::SyntaxChecker::assertionEOL):
(JSC::Yarr::SyntaxChecker::assertionWordBoundary):
(JSC::Yarr::SyntaxChecker::atomPatternCharacter):
(JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass):
(JSC::Yarr::SyntaxChecker::atomCharacterClassBegin):
(JSC::Yarr::SyntaxChecker::atomCharacterClassAtom):
(JSC::Yarr::checkSyntax):

LayoutTests:

Added a new test for the added unicode regular expression processing.

Updated several tests for the y flag changes and "unicode" property.

* js/regexp-unicode-expected.txt: Added.
* js/regexp-unicode.html: Added.
* js/script-tests/regexp-unicode.js: Added.
New test.

* js/Object-getOwnPropertyNames-expected.txt:
* js/regexp-flags-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:
* js/script-tests/regexp-flags.js:
(RegExp.prototype.hasOwnProperty):
Updated tests.


Canonical link: https://commits.webkit.org/172980@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197426 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-02 00:39:01 +00:00
+								PASS "𝌆".match(/[𝌆a]/)[0].length is 1
 								PASS "𝌆".match(/[a𝌆]/u)[0].length is 2
 								PASS "𝌆".match(/[𝌆a]/u)[0].length is 2
 								PASS "𝌆".match(/[a-𝌆]/)[0].length is 1
 								PASS "𝌆".match(/[a-𝌆]/u)[0].length is 2
 								PASS "X".match(/[ -𐑏]/u)[0].length is 1
 								PASS "က".match(/[ -𐑏]/u)[0].length is 1
 								PASS "𐐧".match(/[ -𐑏]/u)[0].length is 2
 								PASS re1.test("Z") is false
 								PASS re1.test("က") is false
 								PASS re1.test("𐐀") is false
 								PASS re2.test("A") is true
 								PASS re2.test("") is false
 								PASS re2.test("𒍅") is true
-												JavaScript string corruption using RegExp with unicode character
https://bugs.webkit.org/show_bug.cgi?id=187947

Reviewed by Yusuke Suzuki.

JSTests:

This change adds regression test for string corruption that occured after
non-BMP character was removed by String.prototype.replace.
The issue was fixed in https://trac.webkit.org/changeset/253648/webkit.

Also, this patch brings back:
1. An out-of-order character class range test.
2. Dangling and combined surrogates tests (as webkit.org/b/154863 is now resolved).

* stress/regress-187947.js: Added.

LayoutTests:

Besides adding a regression test, this patch brings back:
1. An out-of-order character class range test.
2. Dangling and combined surrogates tests (as webkit.org/b/154863 is now resolved).

* js/dom/regexp-range-out-of-order-expected.txt:
* js/dom/script-tests/regexp-range-out-of-order.js:
* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-extended-characters-match.js:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/220335@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@255975 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2020-02-06 21:36:48 +00:00
+								PASS /[𐰁<>#<23>]/u.exec("𐰁").toString() is "𐰁"
 								PASS /[<5B>𐰁<EFBFBD>]/u.exec("𐰁").toString() is "𐰁"
 								PASS /[<5B>#<23>𐰁]/u.exec("𐰁").toString() is "𐰁"
 								PASS /[<5B>𐰁<EFBFBD>]/u.exec("𐰁").toString() is "𐰁"
 								PASS /[𐰁<>#<23>]{2}/u.exec("𐰁") is null
 								PASS /[<5B>𐰁<EFBFBD>]{2}/u.exec("𐰁") is null
 								PASS /[<5B>#<23>𐰁]{2}/u.exec("𐰁") is null
 								PASS /[<5B>𐰁<EFBFBD>]{2}/u.exec("𐰁") is null
 								PASS /<2F>|<7C>|𐰁/u.exec("𐰁").toString() is "𐰁"
 								PASS /<2F>|𐰁|<7C>/u.exec("𐰁").toString() is "𐰁"
 								PASS /<2F>|<7C>|𐰁/u.exec("<22>").toString() is "<22>"
 								PASS /<2F>|𐰁|<7C>/u.exec("<22>").toString() is "<22>"
 								PASS /<2F>𐰁/u.exec("𐰁") is null
 								PASS /<2F>𐰁/u.exec("<22>") is null
 								PASS "<22>𐰁".match(/<2F>𐰁/u)[0].length is 3
-												[ES6] Quantified unicode regular expressions do not work for counts greater than 1
https://bugs.webkit.org/show_bug.cgi?id=156044

Reviewed by Mark Lam.

Source/JavaScriptCore:

Fixed incorrect indexing of non-BMP characters in fixed patterns.  The old code
was indexing by character units, a single JS character, instead of code points
which is 2 JS characters.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::matchDisjunction):

LayoutTests:

Added new test cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/174160@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@198866 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-31 00:38:20 +00:00
+								PASS /𝌆{2}/u.test("𝌆𝌆") is true
 								PASS /𝌆{2}/u.test("𝌆𝌆") is true
-												Implement Unicode RegExp support in the YARR JIT
https://bugs.webkit.org/show_bug.cgi?id=174646

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

This support is only implemented for 64 bit platforms.  It wouldn't be too hard to add support
for 32 bit platforms with a reasonable number of spare registers.  This code slightly refactors
register usage to reduce the number of callee save registers used for non-Unicode expressions.
For Unicode expressions, there are several more registers used to store constants values for
processing surrogate pairs as well as discerning whether a character belongs to the Basic
Multilingual Plane (BMP) or one of the Supplemental Planes.

This implements JIT support for Unicode expressions very similar to how the interpreter works.
Just like in the interpreter, backtracking code uses more space on the stack to save positions.
Moved the BackTrackInfo* structs to YarrPattern as separate functions.  Added xxxIndex()
functions to each of these to simplify how the JIT code reads and writes the structure fields.

Given that reading surrogate pairs and transforming them into a single code point takes a
little processing, the code that implements reading a Unicode character is implemented as a
leaf function added to the end of the JIT'ed code.  The calling convention for
"tryReadUnicodeCharacterHelper()" is non-standard given that the rest of the code assumes
that argument values stay in argument registers for most of the generated code.
That helper takes the starting character address in one register, regUnicodeInputAndTrail,
and uses another dedicated temporary register, regUnicodeTemp.  The result is typically
returned in regT0.  If another return register is requested, we'll create an inline copy of
that function.

Added a new flag to CharacterClass to signify if a class has non-BMP characters.  This flag
is used in optimizeAlternative() where we swap the order of a fixed character class term with
a fixed character term that immediately follows it.  Since the non-BMP character class may
increment "index" when matching, that must be done first before trying to match a fixed
character term later in the string.

Given the usefulness of the LEA instruction on X86 to create a single pointer value from a
base with index and offset, which the YARR JIT uses heavily, I added a new macroAssembler
function, getEffectiveAddress64(), with an ARM64 implementation.  It just calls x86Lea64()
on X86-64.  Also added an ImplicitAddress version of load16Unaligned().

(JSC::MacroAssemblerARM64::load16Unaligned):
(JSC::MacroAssemblerARM64::getEffectiveAddress64):
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::load16Unaligned):
(JSC::MacroAssemblerX86Common::load16):
* assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::getEffectiveAddress64):
* create_regex_tables:
* runtime/RegExp.cpp:
(JSC::RegExp::compile):
* yarr/YarrInterpreter.cpp:
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::tryReadUnicodeCharImpl):
(JSC::Yarr::YarrGenerator::tryReadUnicodeChar):
(JSC::Yarr::YarrGenerator::readCharacter):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::matchAssertionWordchar):
(JSC::Yarr::YarrGenerator::generateAssertionWordBoundary):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::generatePatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassOnce):
(JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
(JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::backtrack):
(JSC::Yarr::YarrGenerator::generateTryReadUnicodeCharacterHelper):
(JSC::Yarr::YarrGenerator::generateEnter):
(JSC::Yarr::YarrGenerator::generateReturn):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):
* yarr/YarrJIT.h:
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::reset):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::CharacterClassConstructor::hasNonBMPCharacters):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::BackTrackInfoPatternCharacter::beginIndex):
(JSC::Yarr::BackTrackInfoPatternCharacter::matchAmountIndex):
(JSC::Yarr::BackTrackInfoCharacterClass::beginIndex):
(JSC::Yarr::BackTrackInfoCharacterClass::matchAmountIndex):
(JSC::Yarr::BackTrackInfoBackReference::beginIndex):
(JSC::Yarr::BackTrackInfoBackReference::matchAmountIndex):
(JSC::Yarr::BackTrackInfoAlternative::offsetIndex):
(JSC::Yarr::BackTrackInfoParentheticalAssertion::beginIndex):
(JSC::Yarr::BackTrackInfoParenthesesOnce::beginIndex):
(JSC::Yarr::BackTrackInfoParenthesesTerminal::beginIndex):

LayoutTests:

Updated tests.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/192507@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@221052 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2017-08-22 22:43:08 +00:00
+								PASS "𐐅𐐅𐐅𐐅".match(/𐐅{3}/u)[0] is "𐐅𐐅𐐅"
 								PASS "𐐂𐐅𐐅𐐅".match(/𐐅{3}/u)[0] is "𐐅𐐅𐐅"
-												[ES6] Quantified unicode regular expressions do not work for counts greater than 1
https://bugs.webkit.org/show_bug.cgi?id=156044

Reviewed by Mark Lam.

Source/JavaScriptCore:

Fixed incorrect indexing of non-BMP characters in fixed patterns.  The old code
was indexing by character units, a single JS character, instead of code points
which is 2 JS characters.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::matchDisjunction):

LayoutTests:

Added new test cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/174160@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@198866 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-31 00:38:20 +00:00
+								PASS "𐐁𐐁𐐀".match(/𐐁{1,3}/u)[0] is "𐐁𐐁"
 								PASS "𐐁𐐩".match(/𐐁{1,3}/iu)[0] is "𐐁𐐩"
 								PASS "𐐁𐐩𐐪𐐩".match(/𐐁{1,}/iu)[0] is "𐐁𐐩"
-												[ES6] Add support for Unicode regular expressions
https://bugs.webkit.org/show_bug.cgi?id=154842

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Added processing of Unicode regular expressions to the Yarr interpreter.

Changed parsing of regular expression patterns and PatternTerms to process characters as
UChar32 in the Yarr code.  The parser converts matched surrogate pairs into the appropriate
Unicode character when the expression is parsed.  When matching a unicode expression and
reading source characters, we convert proper surrogate pair into a Unicode character and
advance the source cursor, "pos", one more position.  The exception to this is when we
know when generating a fixed character atom that we need to match a unicode character
that doesn't fit in 16 bits.  The code calls this an extendedUnicodeCharacter and has a
helper to determine this.

Added 'u' flag and 'unicode' identifier to regular expression classes.  Added an "isUnicode"
parameter to YarrPattern pattern() and internal users of that function.

Updated the generation of the canonicalization tables to include a new set a tables that
follow the ES 6.0, 21.2.2.8.2 Step 2.  Renamed the YarrCanonicalizeUCS2.* files to
YarrCanonicalizeUnicode.*.

Added a new Layout/js test that tests the added functionality.  Updated other tests that
have minor es6 unicode checks and look for valid flags.

Ran the ChakraCore Unicode regular expression tests as well.

* CMakeLists.txt:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters:
* JavaScriptCore.xcodeproj/project.pbxproj:

* inspector/ContentSearchUtilities.cpp:
(Inspector::ContentSearchUtilities::findMagicComment):
* yarr/RegularExpression.cpp:
(JSC::Yarr::RegularExpression::Private::compile):
Updated use of pattern().

* runtime/CommonIdentifiers.h:
* runtime/RegExp.cpp:
(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):
(JSC::RegExp::finishCreation):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpKey.h:
* runtime/RegExpPrototype.cpp:
(JSC::regExpProtoFuncCompile):
(JSC::flagsString):
(JSC::regExpProtoGetterMultiline):
(JSC::regExpProtoGetterUnicode):
(JSC::regExpProtoGetterFlags):
Updated for new 'y' (unicode) flag.  Add check to use the interpreter for unicode regular expressions.

* tests/es6.yaml:
* tests/stress/static-getter-in-names.js:
Updated tests for new flag and for passing the minimal es6 regular expression processing.

* yarr/Yarr.h: Updated the size of information now kept for backtracking.

* yarr/YarrCanonicalizeUCS2.cpp: Removed.
* yarr/YarrCanonicalizeUCS2.h: Removed.
* yarr/YarrCanonicalizeUCS2.js: Removed.
* yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp.
* yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h.
(JSC::Yarr::canonicalCharacterSetInfo):
(JSC::Yarr::canonicalRangeInfoFor):
(JSC::Yarr::getCanonicalPair):
(JSC::Yarr::isCanonicallyUnique):
(JSC::Yarr::areCanonicallyEquivalent):
(JSC::Yarr::rangeInfoFor): Deleted.
* yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js.
(printHeader):
(printFooter):
(hex):
(canonicalize):
(canonicalizeUnicode):
(createUCS2CanonicalGroups):
(createUnicodeCanonicalGroups):
(cu.in.groupedCanonically.characters.sort): Deleted.
(cu.in.groupedCanonically.else): Deleted.
Refactored to output two sets of tables, one for UCS2 and one for Unicode.  The UCS2 tables follow
the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3.  The new Unicode
tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2.  Eliminated the unused Latin1 tables.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkSurrogatePair):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrInterpreter.h:
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumePossibleSurrogatePair):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::patternRemaining):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::YarrPattern::reset):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::assertionBOL):
(JSC::Yarr::SyntaxChecker::assertionEOL):
(JSC::Yarr::SyntaxChecker::assertionWordBoundary):
(JSC::Yarr::SyntaxChecker::atomPatternCharacter):
(JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass):
(JSC::Yarr::SyntaxChecker::atomCharacterClassBegin):
(JSC::Yarr::SyntaxChecker::atomCharacterClassAtom):
(JSC::Yarr::checkSyntax):

LayoutTests:

Added a new test for the added unicode regular expression processing.

Updated several tests for the y flag changes and "unicode" property.

* js/regexp-unicode-expected.txt: Added.
* js/regexp-unicode.html: Added.
* js/script-tests/regexp-unicode.js: Added.
New test.

* js/Object-getOwnPropertyNames-expected.txt:
* js/regexp-flags-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:
* js/script-tests/regexp-flags.js:
(RegExp.prototype.hasOwnProperty):
Updated tests.


Canonical link: https://commits.webkit.org/172980@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197426 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-02 00:39:01 +00:00
+								PASS "𐌑𐌑𐌑".match(/𐌑*a|𐌑*./u)[0] is "𐌑𐌑𐌑"
 								PASS "a𐌑𐌑".match(/a𐌑*?$/u)[0] is "a𐌑𐌑"
 								PASS "a𐌑𐌑𐌑c".match(/a𐌑*cd|a𐌑*c/u)[0] is "a𐌑𐌑𐌑c"
 								PASS "a𐌑𐌑𐌑c".match(/a𐌑+cd|a𐌑+c/u)[0] is "a𐌑𐌑𐌑c"
 								PASS "𐌑𐌑𐌑".match(/𐌑+?a|𐌑+?./u)[0] is "𐌑𐌑"
 								PASS "𐌑𐌑𐌑".match(/𐌑+?a|𐌑+?$/u)[0] is "𐌑𐌑𐌑"
 								PASS "a𐌑𐌑𐌑c".match(/a𐌑*?cd|a𐌑*?c/u)[0] is "a𐌑𐌑𐌑c"
 								PASS "a𐌑𐌑𐌑c".match(/a𐌑+?cd|a𐌑+?c/u)[0] is "a𐌑𐌑𐌑c"
 								PASS "𐌑𐌑𐌑".match(/𐌑+?a|𐌑+?./iu)[0] is "𐌑𐌑"
 								PASS "𐐪𐐪𐌑".match(/𐐂*𐈀|𐐂*𐌑/iu)[0] is "𐐪𐐪𐌑"
 								PASS "𐐪𐐪𐌑".match(/𐐂+𐈀|𐐂+𐌑/iu)[0] is "𐐪𐐪𐌑"
 								PASS "𐐪𐐪𐌑".match(/𐐂*?𐈀|𐐂*?𐌑/iu)[0] is "𐐪𐐪𐌑"
 								PASS "𐐪𐐪𐌑".match(/𐐂+?𐈀|𐐂+?𐌑/iu)[0] is "𐐪𐐪𐌑"
 								PASS "ab𐌑c𐨁".match(/abc|ab𐌑cd|ab𐌑c𐨁d|ab𐌑c𐨁/u)[0] is "ab𐌑c𐨁"
 								PASS "ab𐐨c𐨁".match(/abc|ab𐐀cd|ab𐐀c𐨁d|ab𐐀c𐨁/iu)[0] is "ab𐐨c𐨁"
 								PASS /abc|ab𐐀cd|ab𐐀c𐨁d|ab𐐀c𐨁/iu.test("qwerty123") is false
 								PASS "a𐐨𐐨𐐨c".match(/ac|a𐐀*cd|a𐐀+cd|a𐐀+c/iu)[0] is "a𐐨𐐨𐐨c"
 								PASS "ab𐐨𐐨𐐨c𐨁".match(/abc|ab𐐀*cd|ab𐐀+c𐨁d|ab𐐀+c𐨁/iu)[0] is "ab𐐨𐐨𐐨c𐨁"
 								PASS "ab𐐨𐐨𐐨".match(/abc|ab𐐨*./u)[0] is "ab𐐨𐐨𐐨"
 								PASS "ab𐐨𐐨𐐨".match(/abc|ab𐐀*./iu)[0] is "ab𐐨𐐨𐐨"
-												[ES6] Greedy unicode RegExp's don't properly backtrack past non BMP characters
https://bugs.webkit.org/show_bug.cgi?id=155829

Reviewed by Saam Barati.

Source/JavaScriptCore:

When we backup when matching part of a unicode pattern, we can't just backup one character.
Instead we need to save our start position before trying to match a character and
restore the position if the match fails.  This was done in other places, but wasn't
done for all greedy types.

Fixed matchGlobal() to properly handle advancing past non BMP characters.

* runtime/RegExpObject.cpp:
(JSC::RegExpObject::matchGlobal):
* runtime/RegExpObjectInlines.h:
(JSC::RegExpObject::advanceStringUnicode):
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::matchDisjunction):

LayoutTests:

Added new test cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/173939@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@198624 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-24 14:19:37 +00:00
+								PASS "𐐀".match(/a*/u)[0].length is 0
 								PASS "𐐀".match(/a*/ui)[0].length is 0
 								PASS "𐐀".match(/\d*/u)[0].length is 0
 								PASS "123𐐀".match(/\d*/u)[0] is "123"
 								PASS "12X3𐐀4".match(/\d{0,1}/ug) is ["1", "2", "", "3", "", "4", ""]
-												Implement Unicode RegExp support in the YARR JIT
https://bugs.webkit.org/show_bug.cgi?id=174646

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

This support is only implemented for 64 bit platforms.  It wouldn't be too hard to add support
for 32 bit platforms with a reasonable number of spare registers.  This code slightly refactors
register usage to reduce the number of callee save registers used for non-Unicode expressions.
For Unicode expressions, there are several more registers used to store constants values for
processing surrogate pairs as well as discerning whether a character belongs to the Basic
Multilingual Plane (BMP) or one of the Supplemental Planes.

This implements JIT support for Unicode expressions very similar to how the interpreter works.
Just like in the interpreter, backtracking code uses more space on the stack to save positions.
Moved the BackTrackInfo* structs to YarrPattern as separate functions.  Added xxxIndex()
functions to each of these to simplify how the JIT code reads and writes the structure fields.

Given that reading surrogate pairs and transforming them into a single code point takes a
little processing, the code that implements reading a Unicode character is implemented as a
leaf function added to the end of the JIT'ed code.  The calling convention for
"tryReadUnicodeCharacterHelper()" is non-standard given that the rest of the code assumes
that argument values stay in argument registers for most of the generated code.
That helper takes the starting character address in one register, regUnicodeInputAndTrail,
and uses another dedicated temporary register, regUnicodeTemp.  The result is typically
returned in regT0.  If another return register is requested, we'll create an inline copy of
that function.

Added a new flag to CharacterClass to signify if a class has non-BMP characters.  This flag
is used in optimizeAlternative() where we swap the order of a fixed character class term with
a fixed character term that immediately follows it.  Since the non-BMP character class may
increment "index" when matching, that must be done first before trying to match a fixed
character term later in the string.

Given the usefulness of the LEA instruction on X86 to create a single pointer value from a
base with index and offset, which the YARR JIT uses heavily, I added a new macroAssembler
function, getEffectiveAddress64(), with an ARM64 implementation.  It just calls x86Lea64()
on X86-64.  Also added an ImplicitAddress version of load16Unaligned().

(JSC::MacroAssemblerARM64::load16Unaligned):
(JSC::MacroAssemblerARM64::getEffectiveAddress64):
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::load16Unaligned):
(JSC::MacroAssemblerX86Common::load16):
* assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::getEffectiveAddress64):
* create_regex_tables:
* runtime/RegExp.cpp:
(JSC::RegExp::compile):
* yarr/YarrInterpreter.cpp:
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::tryReadUnicodeCharImpl):
(JSC::Yarr::YarrGenerator::tryReadUnicodeChar):
(JSC::Yarr::YarrGenerator::readCharacter):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::matchAssertionWordchar):
(JSC::Yarr::YarrGenerator::generateAssertionWordBoundary):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::generatePatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassOnce):
(JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
(JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::backtrack):
(JSC::Yarr::YarrGenerator::generateTryReadUnicodeCharacterHelper):
(JSC::Yarr::YarrGenerator::generateEnter):
(JSC::Yarr::YarrGenerator::generateReturn):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):
* yarr/YarrJIT.h:
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::reset):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::CharacterClassConstructor::hasNonBMPCharacters):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::BackTrackInfoPatternCharacter::beginIndex):
(JSC::Yarr::BackTrackInfoPatternCharacter::matchAmountIndex):
(JSC::Yarr::BackTrackInfoCharacterClass::beginIndex):
(JSC::Yarr::BackTrackInfoCharacterClass::matchAmountIndex):
(JSC::Yarr::BackTrackInfoBackReference::beginIndex):
(JSC::Yarr::BackTrackInfoBackReference::matchAmountIndex):
(JSC::Yarr::BackTrackInfoAlternative::offsetIndex):
(JSC::Yarr::BackTrackInfoParentheticalAssertion::beginIndex):
(JSC::Yarr::BackTrackInfoParenthesesOnce::beginIndex):
(JSC::Yarr::BackTrackInfoParenthesesTerminal::beginIndex):

LayoutTests:

Updated tests.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/192507@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@221052 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2017-08-22 22:43:08 +00:00
+								PASS "𐐂𐐅𐐅𐐂𐐅𐐅𐐅".match(/𐐅{3}/u)[0] is "𐐅𐐅𐐅"
-												REGRESSION (r221052): DumpRenderTree crashed in com.apple.JavaScriptCore: JSC::Yarr::YarrCodeBlock::execute + 137
https://bugs.webkit.org/show_bug.cgi?id=175903

Reviewed by Saam Barati.

Source/JavaScriptCore:

In generateCharacterClassGreedy we were incrementing the "count" register before checking
for the end of the input string.  The at-end-of-input check is the final check before
knowing that the current character matched.  In this case, the end of input check
indicates that we ran out of prechecked characters and therefore should fail the match of
the current character.  The backtracking code uses the value in the "count" register as
the number of character that successfully matched, which shouldn't include the current
character.  Therefore we need to move the incrementing of "count" to after the
at end of input check.

Through code inspection of the expectations of other backtracking code, I determined that
the non greedy character class matching code had a similar issue.  I fixed that as well
and added a new test case.

* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):

LayoutTests:

New regression test case.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/192563@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@221111 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2017-08-23 22:24:30 +00:00
+								PASS "a𐐐𐐐b".match(/a(𐐐*?)bc|a(𐐐*?)b/ui)[0] is "a𐐐𐐐b"
-												[ES6] Add support for Unicode regular expressions
https://bugs.webkit.org/show_bug.cgi?id=154842

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Added processing of Unicode regular expressions to the Yarr interpreter.

Changed parsing of regular expression patterns and PatternTerms to process characters as
UChar32 in the Yarr code.  The parser converts matched surrogate pairs into the appropriate
Unicode character when the expression is parsed.  When matching a unicode expression and
reading source characters, we convert proper surrogate pair into a Unicode character and
advance the source cursor, "pos", one more position.  The exception to this is when we
know when generating a fixed character atom that we need to match a unicode character
that doesn't fit in 16 bits.  The code calls this an extendedUnicodeCharacter and has a
helper to determine this.

Added 'u' flag and 'unicode' identifier to regular expression classes.  Added an "isUnicode"
parameter to YarrPattern pattern() and internal users of that function.

Updated the generation of the canonicalization tables to include a new set a tables that
follow the ES 6.0, 21.2.2.8.2 Step 2.  Renamed the YarrCanonicalizeUCS2.* files to
YarrCanonicalizeUnicode.*.

Added a new Layout/js test that tests the added functionality.  Updated other tests that
have minor es6 unicode checks and look for valid flags.

Ran the ChakraCore Unicode regular expression tests as well.

* CMakeLists.txt:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters:
* JavaScriptCore.xcodeproj/project.pbxproj:

* inspector/ContentSearchUtilities.cpp:
(Inspector::ContentSearchUtilities::findMagicComment):
* yarr/RegularExpression.cpp:
(JSC::Yarr::RegularExpression::Private::compile):
Updated use of pattern().

* runtime/CommonIdentifiers.h:
* runtime/RegExp.cpp:
(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):
(JSC::RegExp::finishCreation):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpKey.h:
* runtime/RegExpPrototype.cpp:
(JSC::regExpProtoFuncCompile):
(JSC::flagsString):
(JSC::regExpProtoGetterMultiline):
(JSC::regExpProtoGetterUnicode):
(JSC::regExpProtoGetterFlags):
Updated for new 'y' (unicode) flag.  Add check to use the interpreter for unicode regular expressions.

* tests/es6.yaml:
* tests/stress/static-getter-in-names.js:
Updated tests for new flag and for passing the minimal es6 regular expression processing.

* yarr/Yarr.h: Updated the size of information now kept for backtracking.

* yarr/YarrCanonicalizeUCS2.cpp: Removed.
* yarr/YarrCanonicalizeUCS2.h: Removed.
* yarr/YarrCanonicalizeUCS2.js: Removed.
* yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp.
* yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h.
(JSC::Yarr::canonicalCharacterSetInfo):
(JSC::Yarr::canonicalRangeInfoFor):
(JSC::Yarr::getCanonicalPair):
(JSC::Yarr::isCanonicallyUnique):
(JSC::Yarr::areCanonicallyEquivalent):
(JSC::Yarr::rangeInfoFor): Deleted.
* yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js.
(printHeader):
(printFooter):
(hex):
(canonicalize):
(canonicalizeUnicode):
(createUCS2CanonicalGroups):
(createUnicodeCanonicalGroups):
(cu.in.groupedCanonically.characters.sort): Deleted.
(cu.in.groupedCanonically.else): Deleted.
Refactored to output two sets of tables, one for UCS2 and one for Unicode.  The UCS2 tables follow
the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3.  The new Unicode
tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2.  Eliminated the unused Latin1 tables.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkSurrogatePair):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrInterpreter.h:
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumePossibleSurrogatePair):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::patternRemaining):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::YarrPattern::reset):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::assertionBOL):
(JSC::Yarr::SyntaxChecker::assertionEOL):
(JSC::Yarr::SyntaxChecker::assertionWordBoundary):
(JSC::Yarr::SyntaxChecker::atomPatternCharacter):
(JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass):
(JSC::Yarr::SyntaxChecker::atomCharacterClassBegin):
(JSC::Yarr::SyntaxChecker::atomCharacterClassAtom):
(JSC::Yarr::checkSyntax):

LayoutTests:

Added a new test for the added unicode regular expression processing.

Updated several tests for the y flag changes and "unicode" property.

* js/regexp-unicode-expected.txt: Added.
* js/regexp-unicode.html: Added.
* js/script-tests/regexp-unicode.js: Added.
New test.

* js/Object-getOwnPropertyNames-expected.txt:
* js/regexp-flags-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:
* js/script-tests/regexp-flags.js:
(RegExp.prototype.hasOwnProperty):
Updated tests.


Canonical link: https://commits.webkit.org/172980@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197426 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-02 00:39:01 +00:00
+								PASS match3[0] is "a𐐐𐐐b"
 								PASS match3[1] is undefined.
 								PASS match3[2] is "a𐐐𐐐b"
 								PASS match4[0] is "a𐐸𐐸b"
 								PASS match4[1] is undefined.
 								PASS match4[2] is "𐐸𐐸"
 								PASS match5[0] is "a𐐒𐐒b𐐒𐐒"
 								PASS match5[1] is undefined.
 								PASS match5[2] is "𐐒𐐒"
 								PASS match6[0] is "a𐐒𐐒b𐐺𐐒"
 								PASS match6[1] is undefined.
 								PASS match6[2] is "𐐒𐐒"
-												[ES6] Regular Expression canonicalization tables for Unicode need to be updated to use Unicode CaseFolding.txt
https://bugs.webkit.org/show_bug.cgi?id=155114

Reviewed by Darin Adler.

Source/JavaScriptCore:

Extracted out the Unicode canonicalization table creation from
YarrCanonicalizeUnicode.js into a new Python script, generateYarrCanonicalizeUnicode.
That script generates the Unicode tables as the file YarrCanonicalizeUnicode.cpp in
DerivedSources/JavaScriptCore.

Updated the processing of ignore case to make the ASCII short cuts dependent on whether
or not we are a Unicode pattern.

Renamed yarr/YarrCanonicalizeUnicode.{cpp,js} back to their prior names,
YarrCanonicalizeUCS2.{cpp,js}.
Renamed yarr/YarrCanonicalizeUnicode.h to YarrCanonicalize.h as it declares both the
legacy UCS2 and Unicode tables.

* CMakeLists.txt:
* DerivedSources.make:
* JavaScriptCore.xcodeproj/project.pbxproj:
* generateYarrCanonicalizeUnicode: Added.
* ucd: Added.
* ucd/CaseFolding.txt: Added.  The current verion, 8.0, of the Unicode CaseFolding table.
* yarr/YarrCanonicalizeUCS2.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUnicode.cpp.
* yarr/YarrCanonicalize.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUnicode.h.
* yarr/YarrCanonicalizeUCS2.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUnicode.js.
(printHeader):
* yarr/YarrCanonicalizeUnicode.cpp: Removed.
* yarr/YarrCanonicalizeUnicode.h: Removed.
* yarr/YarrCanonicalizeUnicode.js: Removed.
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::tryConsumeBackReference):
* yarr/YarrJIT.cpp:
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::putChar):

LayoutTests:

Updated test cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/173271@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197781 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-08 18:35:58 +00:00
+								PASS /ſtop/ui.test("stop") is true
 								PASS /stop/ui.test("ſtop") is true
 								PASS /Kelvin/ui.test("kelvin") is true
 								PASS /KELVIN/ui.test("Kelvin") is true
-												[ES6] Make Unicode RegExp pattern parsing conform to the spec
https://bugs.webkit.org/show_bug.cgi?id=154988

Reviewed by Benjamin Poulain.

Source/JavaScriptCore:

Updated RegExp pattern processing with 'u' (Unicode) flag to conform to the
spec (https://tc39.github.io/ecma262/2016/#sec-patterns).  In the spec, the
grammar is annotated with [U] annotations.  Productions that are prefixed with
[+U] are only available with the Unicode flags while productions prefixed with
[~U] are only available without the Unicode flag.

Added flags argument to Yarr::checkSyntax() so we can catch Unicode flag related
parsing errors at syntax checking time.  Restricted what escapes are available for
non Unicode patterns.  Most of this is defined in the IdentityEscape rule in the
pattern grammar.

Added \- as a CharacterClass only escape in Unicode patterns.

Updated the tests for these changes.

Made changes suggested in https://bugs.webkit.org/show_bug.cgi?id=154842#c22 after
change set r197426 was landed.

* parser/ASTBuilder.h:
(JSC::ASTBuilder::createRegExp):
* parser/Parser.cpp:
(JSC::Parser<LexerType>::parsePrimaryExpression):
* parser/SyntaxChecker.h:
(JSC::SyntaxChecker::createRegExp):
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::uncheckInput):
(JSC::Yarr::Interpreter::InputStream::atStart):
(JSC::Yarr::Interpreter::InputStream::atEnd):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::isIdentityEscapeAnError):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::disjunction):
(JSC::Yarr::checkSyntax):
* yarr/YarrSyntaxChecker.h:

LayoutTests:

Added tests cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:
(shouldThrowInvalidEscape):


[ES6] Add support for Symbol.toPrimitive
https://bugs.webkit.org/show_bug.cgi?id=154877

Reviewed by Saam Barati.

Update test for Symbol.toPrimitive.

* js/Object-getOwnPropertyNames-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:


Canonical link: https://commits.webkit.org/173079@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197534 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-04 01:24:28 +00:00
+								PASS /\u{1}/.test("u") is true
 								PASS /\u{4}/.test("u") is false
 								PASS /\u{4}/.test("uuuu") is true
 								PASS "800-555-1212".match(/[0-9\-]*/u)[0].length is 12
-												Implement Unicode RegExp support in the YARR JIT
https://bugs.webkit.org/show_bug.cgi?id=174646

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

This support is only implemented for 64 bit platforms.  It wouldn't be too hard to add support
for 32 bit platforms with a reasonable number of spare registers.  This code slightly refactors
register usage to reduce the number of callee save registers used for non-Unicode expressions.
For Unicode expressions, there are several more registers used to store constants values for
processing surrogate pairs as well as discerning whether a character belongs to the Basic
Multilingual Plane (BMP) or one of the Supplemental Planes.

This implements JIT support for Unicode expressions very similar to how the interpreter works.
Just like in the interpreter, backtracking code uses more space on the stack to save positions.
Moved the BackTrackInfo* structs to YarrPattern as separate functions.  Added xxxIndex()
functions to each of these to simplify how the JIT code reads and writes the structure fields.

Given that reading surrogate pairs and transforming them into a single code point takes a
little processing, the code that implements reading a Unicode character is implemented as a
leaf function added to the end of the JIT'ed code.  The calling convention for
"tryReadUnicodeCharacterHelper()" is non-standard given that the rest of the code assumes
that argument values stay in argument registers for most of the generated code.
That helper takes the starting character address in one register, regUnicodeInputAndTrail,
and uses another dedicated temporary register, regUnicodeTemp.  The result is typically
returned in regT0.  If another return register is requested, we'll create an inline copy of
that function.

Added a new flag to CharacterClass to signify if a class has non-BMP characters.  This flag
is used in optimizeAlternative() where we swap the order of a fixed character class term with
a fixed character term that immediately follows it.  Since the non-BMP character class may
increment "index" when matching, that must be done first before trying to match a fixed
character term later in the string.

Given the usefulness of the LEA instruction on X86 to create a single pointer value from a
base with index and offset, which the YARR JIT uses heavily, I added a new macroAssembler
function, getEffectiveAddress64(), with an ARM64 implementation.  It just calls x86Lea64()
on X86-64.  Also added an ImplicitAddress version of load16Unaligned().

(JSC::MacroAssemblerARM64::load16Unaligned):
(JSC::MacroAssemblerARM64::getEffectiveAddress64):
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::load16Unaligned):
(JSC::MacroAssemblerX86Common::load16):
* assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::getEffectiveAddress64):
* create_regex_tables:
* runtime/RegExp.cpp:
(JSC::RegExp::compile):
* yarr/YarrInterpreter.cpp:
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::tryReadUnicodeCharImpl):
(JSC::Yarr::YarrGenerator::tryReadUnicodeChar):
(JSC::Yarr::YarrGenerator::readCharacter):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::matchAssertionWordchar):
(JSC::Yarr::YarrGenerator::generateAssertionWordBoundary):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::generatePatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassOnce):
(JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
(JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::backtrack):
(JSC::Yarr::YarrGenerator::generateTryReadUnicodeCharacterHelper):
(JSC::Yarr::YarrGenerator::generateEnter):
(JSC::Yarr::YarrGenerator::generateReturn):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):
* yarr/YarrJIT.h:
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::reset):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::CharacterClassConstructor::hasNonBMPCharacters):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::BackTrackInfoPatternCharacter::beginIndex):
(JSC::Yarr::BackTrackInfoPatternCharacter::matchAmountIndex):
(JSC::Yarr::BackTrackInfoCharacterClass::beginIndex):
(JSC::Yarr::BackTrackInfoCharacterClass::matchAmountIndex):
(JSC::Yarr::BackTrackInfoBackReference::beginIndex):
(JSC::Yarr::BackTrackInfoBackReference::matchAmountIndex):
(JSC::Yarr::BackTrackInfoAlternative::offsetIndex):
(JSC::Yarr::BackTrackInfoParentheticalAssertion::beginIndex):
(JSC::Yarr::BackTrackInfoParenthesesOnce::beginIndex):
(JSC::Yarr::BackTrackInfoParenthesesTerminal::beginIndex):

LayoutTests:

Updated tests.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/192507@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@221052 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2017-08-22 22:43:08 +00:00
+								PASS "🂡🃑🂸🃉🃚".match(re7)[0] is "🂡🃑"
 								PASS "🂡🃑🂱🃉🃚".match(re7)[0] is "🂡🃑🂱"
 								PASS "🂡🃑🂱🃁🃚".match(re7)[0] is "🂡🃑🂱🃁"
 								PASS "🂣🃑🂱🃁🃚".match(re7)[0] is "🃑🂱🃁"
 								PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]*a|[𐌐𐌑]*./iu)[0] is "𐌑𐌐𐌑"
 								PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]*?a|[𐌐𐌑]*?./iu)[0] is "𐌑"
 								PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]+a|[𐌐𐌑]+./iu)[0] is "𐌑𐌐𐌑"
 								PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]+?a|[𐌐𐌑]+?./iu)[0] is "𐌑𐌐"
-												REGRESSION (r243642): com.apple.JavaScriptCore crash in JSC::RegExpObject::execInline
https://bugs.webkit.org/show_bug.cgi?id=196477

Reviewed by Keith Miller.

Source/JavaScriptCore:

The problem here is that when we advance the index by 2 for a character class that only
has non-BMP characters, we might go past the end of the string.  This can happen for
greedy counted character classes that are part of a alternative where there is one
character to match after the greedy non-BMP character class.

The "do we have string left to match" check at the top of the JIT loop for the counted
character class checks to see if index is not equal to the string length.  For non-BMP
character classes, we need to check to see if there are at least 2 characters left.
Therefore we now temporarily add 1 to the current index before comparing.  This checks
to see if there are iat least 2 characters left to match, instead of 1.

* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):

LayoutTests:

Updated the test with a couple more test cases to test a few variants of this bug.
Also added a couple of non-greedy counted non-BMP character class tests that don't have
the bug just to be sure.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/210835@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@243839 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2019-04-03 23:51:12 +00:00
+								PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]+?a$|[𐌐𐌑]+?.$/iu)[0] is "𐌑𐌐𐌑"
 								PASS "𐌑𐌐𐌑".match(/[𐌁𐌑x]+a|[𐌐𐌑x]+./iu)[0] is "𐌑𐌐𐌑"
 								PASS "𐌑𐌐𐌑".match(/[𐌁𐌑x]+?a|[𐌐𐌑x]+?./iu)[0] is "𐌑𐌐"
-												Implement Unicode RegExp support in the YARR JIT
https://bugs.webkit.org/show_bug.cgi?id=174646

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

This support is only implemented for 64 bit platforms.  It wouldn't be too hard to add support
for 32 bit platforms with a reasonable number of spare registers.  This code slightly refactors
register usage to reduce the number of callee save registers used for non-Unicode expressions.
For Unicode expressions, there are several more registers used to store constants values for
processing surrogate pairs as well as discerning whether a character belongs to the Basic
Multilingual Plane (BMP) or one of the Supplemental Planes.

This implements JIT support for Unicode expressions very similar to how the interpreter works.
Just like in the interpreter, backtracking code uses more space on the stack to save positions.
Moved the BackTrackInfo* structs to YarrPattern as separate functions.  Added xxxIndex()
functions to each of these to simplify how the JIT code reads and writes the structure fields.

Given that reading surrogate pairs and transforming them into a single code point takes a
little processing, the code that implements reading a Unicode character is implemented as a
leaf function added to the end of the JIT'ed code.  The calling convention for
"tryReadUnicodeCharacterHelper()" is non-standard given that the rest of the code assumes
that argument values stay in argument registers for most of the generated code.
That helper takes the starting character address in one register, regUnicodeInputAndTrail,
and uses another dedicated temporary register, regUnicodeTemp.  The result is typically
returned in regT0.  If another return register is requested, we'll create an inline copy of
that function.

Added a new flag to CharacterClass to signify if a class has non-BMP characters.  This flag
is used in optimizeAlternative() where we swap the order of a fixed character class term with
a fixed character term that immediately follows it.  Since the non-BMP character class may
increment "index" when matching, that must be done first before trying to match a fixed
character term later in the string.

Given the usefulness of the LEA instruction on X86 to create a single pointer value from a
base with index and offset, which the YARR JIT uses heavily, I added a new macroAssembler
function, getEffectiveAddress64(), with an ARM64 implementation.  It just calls x86Lea64()
on X86-64.  Also added an ImplicitAddress version of load16Unaligned().

(JSC::MacroAssemblerARM64::load16Unaligned):
(JSC::MacroAssemblerARM64::getEffectiveAddress64):
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::load16Unaligned):
(JSC::MacroAssemblerX86Common::load16):
* assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::getEffectiveAddress64):
* create_regex_tables:
* runtime/RegExp.cpp:
(JSC::RegExp::compile):
* yarr/YarrInterpreter.cpp:
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::tryReadUnicodeCharImpl):
(JSC::Yarr::YarrGenerator::tryReadUnicodeChar):
(JSC::Yarr::YarrGenerator::readCharacter):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::matchAssertionWordchar):
(JSC::Yarr::YarrGenerator::generateAssertionWordBoundary):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::generatePatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassOnce):
(JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
(JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::backtrack):
(JSC::Yarr::YarrGenerator::generateTryReadUnicodeCharacterHelper):
(JSC::Yarr::YarrGenerator::generateEnter):
(JSC::Yarr::YarrGenerator::generateReturn):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):
* yarr/YarrJIT.h:
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::reset):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::CharacterClassConstructor::hasNonBMPCharacters):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::BackTrackInfoPatternCharacter::beginIndex):
(JSC::Yarr::BackTrackInfoPatternCharacter::matchAmountIndex):
(JSC::Yarr::BackTrackInfoCharacterClass::beginIndex):
(JSC::Yarr::BackTrackInfoCharacterClass::matchAmountIndex):
(JSC::Yarr::BackTrackInfoBackReference::beginIndex):
(JSC::Yarr::BackTrackInfoBackReference::matchAmountIndex):
(JSC::Yarr::BackTrackInfoAlternative::offsetIndex):
(JSC::Yarr::BackTrackInfoParentheticalAssertion::beginIndex):
(JSC::Yarr::BackTrackInfoParenthesesOnce::beginIndex):
(JSC::Yarr::BackTrackInfoParenthesesTerminal::beginIndex):

LayoutTests:

Updated tests.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/192507@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@221052 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2017-08-22 22:43:08 +00:00
+								PASS "C83|НАЧАТЬ".match(re8)[0] is "C83|НАЧАТЬ"
 								PASS "This.Is.16.Chars|НАЧАТЬ".match(re8)[0] is "This.Is.16.Chars|НАЧАТЬ"
 								PASS "Testing\nሴ 1 2 3".match(/^[က-𐃿] 1 2 3/um)[0] is "ሴ 1 2 3"
 								PASS "Testing\n𐃰 1 2 3".match(/^[က-𐃿] 1 2 3/um)[0] is "𐃰 1 2 3"
 								PASS "g\nሴ 1 2 3".match(/g\n^[က-𐃿] 1 2 3/um)[0] is "g\nሴ 1 2 3"
 								PASS "g\n𐃰 1 2 3".match(/g\n^[က-𐃿] 1 2 3/um)[0] is "g\n𐃰 1 2 3"
 								PASS "Testing ሴ\n1 2 3".match(/Testing [က-𐃿]$/um)[0] is "Testing ሴ"
 								PASS "Testing 𐃰\n1 2 3".match(/Testing [က-𐃿]$/um)[0] is "Testing 𐃰"
 								PASS "Testing ሴ\n1 2 3".match(/g [က-𐃿]$\n1/um)[0] is "g ሴ\n1"
 								PASS "Testing 𐃰\n1 2 3".match(/g [က-𐃿]$\n1/um)[0] is "g 𐃰\n1"
-												[ES6] Make Unicode RegExp pattern parsing conform to the spec
https://bugs.webkit.org/show_bug.cgi?id=154988

Reviewed by Benjamin Poulain.

Source/JavaScriptCore:

Updated RegExp pattern processing with 'u' (Unicode) flag to conform to the
spec (https://tc39.github.io/ecma262/2016/#sec-patterns).  In the spec, the
grammar is annotated with [U] annotations.  Productions that are prefixed with
[+U] are only available with the Unicode flags while productions prefixed with
[~U] are only available without the Unicode flag.

Added flags argument to Yarr::checkSyntax() so we can catch Unicode flag related
parsing errors at syntax checking time.  Restricted what escapes are available for
non Unicode patterns.  Most of this is defined in the IdentityEscape rule in the
pattern grammar.

Added \- as a CharacterClass only escape in Unicode patterns.

Updated the tests for these changes.

Made changes suggested in https://bugs.webkit.org/show_bug.cgi?id=154842#c22 after
change set r197426 was landed.

* parser/ASTBuilder.h:
(JSC::ASTBuilder::createRegExp):
* parser/Parser.cpp:
(JSC::Parser<LexerType>::parsePrimaryExpression):
* parser/SyntaxChecker.h:
(JSC::SyntaxChecker::createRegExp):
* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::uncheckInput):
(JSC::Yarr::Interpreter::InputStream::atStart):
(JSC::Yarr::Interpreter::InputStream::atEnd):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::isIdentityEscapeAnError):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::disjunction):
(JSC::Yarr::checkSyntax):
* yarr/YarrSyntaxChecker.h:

LayoutTests:

Added tests cases.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:
(shouldThrowInvalidEscape):


[ES6] Add support for Symbol.toPrimitive
https://bugs.webkit.org/show_bug.cgi?id=154877

Reviewed by Saam Barati.

Update test for Symbol.toPrimitive.

* js/Object-getOwnPropertyNames-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:


Canonical link: https://commits.webkit.org/173079@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197534 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-04 01:24:28 +00:00
+								PASS "this is ba test".match(/is b\cha test/u)[0].length is 11
 								PASS new RegExp("\\/", "u").source is "\\/"
-												Add support in named capture group identifiers for direct surrogate pairs
https://bugs.webkit.org/show_bug.cgi?id=178174

Reviewed by Darin Adler and Michael Saboff.

JSTests:

* test262/expectations.yaml: Mark 2 test cases as passing.

Source/JavaScriptCore:

This change:

a) Adds support for unescaped astral symbols in RegExp identifier names [1],
   aligning JSC with V8.

b) Rewords InvalidUnicodeEscape error code to be used for \uXXXX escapes in
   Unicode patterns and named groups/references instead of InvalidIdentityEscape,
   matching error messages in V8 and SpiderMonkey.

c) Adds hasError() checks after tryConsumeGroupName() so errors generated in
   tryConsumeIdentifierCharacter() would not get overriden.

d) Removes code duplication by using tryConsumeUnicodeEscape() for parsing \u
   in parseEscape(); cleans up parsing \u{} escapes a bit, preferring ASSERTs
   over hasError() checks.

[1]: https://tc39.es/ecma262/#prod-RegExpIdentifierName

* yarr/YarrErrorCode.cpp:
(JSC::Yarr::errorMessage):
(JSC::Yarr::errorToThrow):
* yarr/YarrErrorCode.h:
* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parseParenthesesBegin):
(JSC::Yarr::Parser::tryConsumeUnicodeEscape):
(JSC::Yarr::Parser::tryConsumeIdentifierCharacter):

LayoutTests:

Adjusted tests for error messages changes and added coverage for messages
of syntax errors due to invalid \u escapes inside named groups/references.

* js/regexp-named-capture-groups-expected.txt:
* js/regexp-unicode-expected.txt:
* js/regress-158080-expected.txt:
* js/script-tests/regexp-named-capture-groups.js:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/222707@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@259262 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2020-03-31 01:27:10 +00:00
+								PASS r = new RegExp("\\u{110000}", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape.
-												Implement Unicode RegExp support in the YARR JIT
https://bugs.webkit.org/show_bug.cgi?id=174646

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

This support is only implemented for 64 bit platforms.  It wouldn't be too hard to add support
for 32 bit platforms with a reasonable number of spare registers.  This code slightly refactors
register usage to reduce the number of callee save registers used for non-Unicode expressions.
For Unicode expressions, there are several more registers used to store constants values for
processing surrogate pairs as well as discerning whether a character belongs to the Basic
Multilingual Plane (BMP) or one of the Supplemental Planes.

This implements JIT support for Unicode expressions very similar to how the interpreter works.
Just like in the interpreter, backtracking code uses more space on the stack to save positions.
Moved the BackTrackInfo* structs to YarrPattern as separate functions.  Added xxxIndex()
functions to each of these to simplify how the JIT code reads and writes the structure fields.

Given that reading surrogate pairs and transforming them into a single code point takes a
little processing, the code that implements reading a Unicode character is implemented as a
leaf function added to the end of the JIT'ed code.  The calling convention for
"tryReadUnicodeCharacterHelper()" is non-standard given that the rest of the code assumes
that argument values stay in argument registers for most of the generated code.
That helper takes the starting character address in one register, regUnicodeInputAndTrail,
and uses another dedicated temporary register, regUnicodeTemp.  The result is typically
returned in regT0.  If another return register is requested, we'll create an inline copy of
that function.

Added a new flag to CharacterClass to signify if a class has non-BMP characters.  This flag
is used in optimizeAlternative() where we swap the order of a fixed character class term with
a fixed character term that immediately follows it.  Since the non-BMP character class may
increment "index" when matching, that must be done first before trying to match a fixed
character term later in the string.

Given the usefulness of the LEA instruction on X86 to create a single pointer value from a
base with index and offset, which the YARR JIT uses heavily, I added a new macroAssembler
function, getEffectiveAddress64(), with an ARM64 implementation.  It just calls x86Lea64()
on X86-64.  Also added an ImplicitAddress version of load16Unaligned().

(JSC::MacroAssemblerARM64::load16Unaligned):
(JSC::MacroAssemblerARM64::getEffectiveAddress64):
* assembler/MacroAssemblerX86Common.h:
(JSC::MacroAssemblerX86Common::load16Unaligned):
(JSC::MacroAssemblerX86Common::load16):
* assembler/MacroAssemblerX86_64.h:
(JSC::MacroAssemblerX86_64::getEffectiveAddress64):
* create_regex_tables:
* runtime/RegExp.cpp:
(JSC::RegExp::compile):
* yarr/YarrInterpreter.cpp:
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::tryReadUnicodeCharImpl):
(JSC::Yarr::YarrGenerator::tryReadUnicodeChar):
(JSC::Yarr::YarrGenerator::readCharacter):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::matchAssertionWordchar):
(JSC::Yarr::YarrGenerator::generateAssertionWordBoundary):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::generatePatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassOnce):
(JSC::Yarr::YarrGenerator::generateCharacterClassFixed):
(JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::generateCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::backtrack):
(JSC::Yarr::YarrGenerator::generateTryReadUnicodeCharacterHelper):
(JSC::Yarr::YarrGenerator::generateEnter):
(JSC::Yarr::YarrGenerator::generateReturn):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):
* yarr/YarrJIT.h:
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::reset):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::CharacterClassConstructor::hasNonBMPCharacters):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::BackTrackInfoPatternCharacter::beginIndex):
(JSC::Yarr::BackTrackInfoPatternCharacter::matchAmountIndex):
(JSC::Yarr::BackTrackInfoCharacterClass::beginIndex):
(JSC::Yarr::BackTrackInfoCharacterClass::matchAmountIndex):
(JSC::Yarr::BackTrackInfoBackReference::beginIndex):
(JSC::Yarr::BackTrackInfoBackReference::matchAmountIndex):
(JSC::Yarr::BackTrackInfoAlternative::offsetIndex):
(JSC::Yarr::BackTrackInfoParentheticalAssertion::beginIndex):
(JSC::Yarr::BackTrackInfoParenthesesOnce::beginIndex):
(JSC::Yarr::BackTrackInfoParenthesesTerminal::beginIndex):

LayoutTests:

Updated tests.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/192507@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@221052 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2017-08-22 22:43:08 +00:00
+								PASS r = new RegExp("𐐅{2147483648}", "u") threw exception SyntaxError: Invalid regular expression: pattern exceeds string length limits.
-												Incomplete braced quantifiers should be banned in Unicode patterns only
https://bugs.webkit.org/show_bug.cgi?id=206776

Reviewed by Darin Adler.

JSTests:

Although the change does not affect Unicode property escapes, a few
test/built-ins/RegExp/property-escapes/non-existent-property-value*.js files
are now passing because they had \\p or \\P instead of CharacterClassEscape.

* test262/expectations.yaml: Mark 18 test cases as passing.

Source/JavaScriptCore:

This change adds SyntaxError for Unicode patterns, aligning JSC with
V8 and SpiderMonkey, and also capitalizes "Unicode" in error messages.

Grammar: https://tc39.es/ecma262/#prod-annexB-Term
(/u flag precludes the use of ExtendedAtom and thus InvalidBracedQuantifier)

* yarr/YarrErrorCode.cpp:
(JSC::Yarr::errorMessage):
(JSC::Yarr::errorToThrow):
* yarr/YarrErrorCode.h:
* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseTokens):

LayoutTests:

An error message test is added for this change and for webkit.org/b/206768.
Other tests are adjusted for capitalized "Unicode" in error messages.

* js/regexp-named-capture-groups-expected.txt:
* js/regexp-unicode-expected.txt:
* js/regress-158080-expected.txt:
* js/script-tests/regexp-named-capture-groups.js:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/220026@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@255452 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2020-01-30 21:27:11 +00:00
+								PASS /{/u threw exception SyntaxError: Invalid regular expression: incomplete {} quantifier for Unicode pattern.
 								PASS /[a-\d]/u threw exception SyntaxError: Invalid regular expression: invalid range in character class for Unicode pattern.
-												Unmatched ] or } brackets should be syntax errors in Unicode patterns only
https://bugs.webkit.org/show_bug.cgi?id=207023

Reviewed by Darin Adler.

JSTests:

* test262/expectations.yaml: Mark 2 test cases as passing.

Source/JavaScriptCore:

This change adds SyntaxError for Unicode patterns, aligning JSC with
V8 and SpiderMonkey.

Grammar: https://tc39.es/ecma262/#prod-annexB-Term
(/u flag precludes the use of ExtendedAtom and thus ExtendedPatternCharacter)

* yarr/YarrErrorCode.cpp:
(JSC::Yarr::errorMessage):
(JSC::Yarr::errorToThrow):
* yarr/YarrErrorCode.h:
* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseTokens):

LayoutTests:

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/220066@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@255505 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2020-01-31 17:59:26 +00:00
+								PASS /]/u threw exception SyntaxError: Invalid regular expression: unmatched ] or } bracket for Unicode pattern.
-												Octal escapes should be max 3 digits and syntax errors in Unicode patterns
https://bugs.webkit.org/show_bug.cgi?id=167067

Reviewed by Ross Kirsling.

JSTests:

* test262/expectations.yaml: Mark 4 test cases as passing.

Source/JavaScriptCore:

This patch:

a) Adds SyntaxError for octal escapes in Unicode patterns, while preserving /\0/u
being parsed as null character escape. Grammar: https://tc39.es/ecma262/#prod-CharacterEscape

b) Limits consumeOctal() to 3 digits only, preventing it from consuming leading zeros.
Grammar: https://tc39.es/ecma262/#prod-annexB-LegacyOctalEscapeSequence

Both changes align JSC with V8 and SpiderMonkey.

* yarr/YarrErrorCode.cpp:
(JSC::Yarr::errorMessage):
(JSC::Yarr::errorToThrow):
* yarr/YarrErrorCode.h:
* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumeOctal):

LayoutTests:

* fast/regex/script-tests/pcre-test-1.js:
* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/222937@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@259546 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2020-04-05 08:12:54 +00:00
+								PASS /\5/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
 								PASS /\01/u threw exception SyntaxError: Invalid regular expression: invalid octal escape for Unicode pattern.
 								PASS /[\23]/u threw exception SyntaxError: Invalid regular expression: invalid octal escape for Unicode pattern.
-												Non-alphabetical \c escapes should be syntax errors in Unicode patterns only
https://bugs.webkit.org/show_bug.cgi?id=207091

Reviewed by Darin Adler.

JSTests:

* test262/expectations.yaml: Mark 4 test cases as passing.

Source/JavaScriptCore:

This change adds SyntaxError for non-alphabetical and identity \c escapes
in Unicode patterns, aligning JSC with V8 and SpiderMonkey.

Grammar: https://tc39.es/ecma262/#prod-annexB-ClassEscape
(/u flag precludes the use of ClassControlLetter)

* yarr/YarrErrorCode.cpp:
(JSC::Yarr::errorMessage):
(JSC::Yarr::errorToThrow):
* yarr/YarrErrorCode.h:
* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseEscape):

LayoutTests:

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/220100@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@255544 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2020-02-02 00:20:04 +00:00
+								PASS /\c9/u threw exception SyntaxError: Invalid regular expression: invalid \c escape for Unicode pattern.
-												Incomplete braced quantifiers should be banned in Unicode patterns only
https://bugs.webkit.org/show_bug.cgi?id=206776

Reviewed by Darin Adler.

JSTests:

Although the change does not affect Unicode property escapes, a few
test/built-ins/RegExp/property-escapes/non-existent-property-value*.js files
are now passing because they had \\p or \\P instead of CharacterClassEscape.

* test262/expectations.yaml: Mark 18 test cases as passing.

Source/JavaScriptCore:

This change adds SyntaxError for Unicode patterns, aligning JSC with
V8 and SpiderMonkey, and also capitalizes "Unicode" in error messages.

Grammar: https://tc39.es/ecma262/#prod-annexB-Term
(/u flag precludes the use of ExtendedAtom and thus InvalidBracedQuantifier)

* yarr/YarrErrorCode.cpp:
(JSC::Yarr::errorMessage):
(JSC::Yarr::errorToThrow):
* yarr/YarrErrorCode.h:
* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseTokens):

LayoutTests:

An error message test is added for this change and for webkit.org/b/206768.
Other tests are adjusted for capitalized "Unicode" in error messages.

* js/regexp-named-capture-groups-expected.txt:
* js/regexp-unicode-expected.txt:
* js/regress-158080-expected.txt:
* js/script-tests/regexp-named-capture-groups.js:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/220026@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@255452 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2020-01-30 21:27:11 +00:00
+								PASS r = new RegExp("\\-", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
 								PASS r = new RegExp("\\a", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
 								PASS r = new RegExp("[\\a]", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
 								PASS r = new RegExp("[\\B]", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
 								PASS r = new RegExp("\\x", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
 								PASS r = new RegExp("[\\x]", "u") threw exception SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern.
-												Add support in named capture group identifiers for direct surrogate pairs
https://bugs.webkit.org/show_bug.cgi?id=178174

Reviewed by Darin Adler and Michael Saboff.

JSTests:

* test262/expectations.yaml: Mark 2 test cases as passing.

Source/JavaScriptCore:

This change:

a) Adds support for unescaped astral symbols in RegExp identifier names [1],
   aligning JSC with V8.

b) Rewords InvalidUnicodeEscape error code to be used for \uXXXX escapes in
   Unicode patterns and named groups/references instead of InvalidIdentityEscape,
   matching error messages in V8 and SpiderMonkey.

c) Adds hasError() checks after tryConsumeGroupName() so errors generated in
   tryConsumeIdentifierCharacter() would not get overriden.

d) Removes code duplication by using tryConsumeUnicodeEscape() for parsing \u
   in parseEscape(); cleans up parsing \u{} escapes a bit, preferring ASSERTs
   over hasError() checks.

[1]: https://tc39.es/ecma262/#prod-RegExpIdentifierName

* yarr/YarrErrorCode.cpp:
(JSC::Yarr::errorMessage):
(JSC::Yarr::errorToThrow):
* yarr/YarrErrorCode.h:
* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::parseParenthesesBegin):
(JSC::Yarr::Parser::tryConsumeUnicodeEscape):
(JSC::Yarr::Parser::tryConsumeIdentifierCharacter):

LayoutTests:

Adjusted tests for error messages changes and added coverage for messages
of syntax errors due to invalid \u escapes inside named groups/references.

* js/regexp-named-capture-groups-expected.txt:
* js/regexp-unicode-expected.txt:
* js/regress-158080-expected.txt:
* js/script-tests/regexp-named-capture-groups.js:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/222707@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@259262 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2020-03-31 01:27:10 +00:00
+								PASS r = new RegExp("\\u", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode \u escape.
 								PASS r = new RegExp("[\\u]", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode \u escape.
 								PASS r = new RegExp("\\u{", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape.
 								PASS r = new RegExp("\\u{\udead", "u") threw exception SyntaxError: Invalid regular expression: invalid Unicode code point \u{} escape.
-												Incomplete braced quantifiers should be banned in Unicode patterns only
https://bugs.webkit.org/show_bug.cgi?id=206776

Reviewed by Darin Adler.

JSTests:

Although the change does not affect Unicode property escapes, a few
test/built-ins/RegExp/property-escapes/non-existent-property-value*.js files
are now passing because they had \\p or \\P instead of CharacterClassEscape.

* test262/expectations.yaml: Mark 18 test cases as passing.

Source/JavaScriptCore:

This change adds SyntaxError for Unicode patterns, aligning JSC with
V8 and SpiderMonkey, and also capitalizes "Unicode" in error messages.

Grammar: https://tc39.es/ecma262/#prod-annexB-Term
(/u flag precludes the use of ExtendedAtom and thus InvalidBracedQuantifier)

* yarr/YarrErrorCode.cpp:
(JSC::Yarr::errorMessage):
(JSC::Yarr::errorToThrow):
* yarr/YarrErrorCode.h:
* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseTokens):

LayoutTests:

An error message test is added for this change and for webkit.org/b/206768.
Other tests are adjusted for capitalized "Unicode" in error messages.

* js/regexp-named-capture-groups-expected.txt:
* js/regexp-unicode-expected.txt:
* js/regress-158080-expected.txt:
* js/script-tests/regexp-named-capture-groups.js:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/220026@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@255452 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2020-01-30 21:27:11 +00:00
+								PASS /\1/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
 								PASS /\2/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
 								PASS /\3/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
 								PASS /\4/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
 								PASS /\5/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
 								PASS /\6/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
 								PASS /\7/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
 								PASS /\8/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
 								PASS /\9/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
-												test262: test262/test/language/literals/regexp/u-dec-esc.js
https://bugs.webkit.org/show_bug.cgi?id=170687

Patch by Joseph Pecoraro <pecoraro@apple.com> on 2017-04-12
Reviewed by Michael Saboff.

JSTests:

* test262.yaml:

Source/JavaScriptCore:

* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseEscape):
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPattern::errorMessage):
(JSC::Yarr::YarrPattern::compile):
* yarr/YarrPattern.h:
In unicoe patterns, invalid backreferences are an error.

LayoutTests:

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:
Extend test to test invalid backreferences.

Canonical link: https://commits.webkit.org/187729@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@215311 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2017-04-13 02:51:18 +00:00
+								PASS /(.)\1/u did not throw exception.
 								PASS /(.)(.)\2/u did not throw exception.
-												Incomplete braced quantifiers should be banned in Unicode patterns only
https://bugs.webkit.org/show_bug.cgi?id=206776

Reviewed by Darin Adler.

JSTests:

Although the change does not affect Unicode property escapes, a few
test/built-ins/RegExp/property-escapes/non-existent-property-value*.js files
are now passing because they had \\p or \\P instead of CharacterClassEscape.

* test262/expectations.yaml: Mark 18 test cases as passing.

Source/JavaScriptCore:

This change adds SyntaxError for Unicode patterns, aligning JSC with
V8 and SpiderMonkey, and also capitalizes "Unicode" in error messages.

Grammar: https://tc39.es/ecma262/#prod-annexB-Term
(/u flag precludes the use of ExtendedAtom and thus InvalidBracedQuantifier)

* yarr/YarrErrorCode.cpp:
(JSC::Yarr::errorMessage):
(JSC::Yarr::errorToThrow):
* yarr/YarrErrorCode.h:
* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseTokens):

LayoutTests:

An error message test is added for this change and for webkit.org/b/206768.
Other tests are adjusted for capitalized "Unicode" in error messages.

* js/regexp-named-capture-groups-expected.txt:
* js/regexp-unicode-expected.txt:
* js/regress-158080-expected.txt:
* js/script-tests/regexp-named-capture-groups.js:
* js/script-tests/regexp-unicode.js:


Canonical link: https://commits.webkit.org/220026@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@255452 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2020-01-30 21:27:11 +00:00
+								PASS /(.)(.)\3/u threw exception SyntaxError: Invalid regular expression: invalid backreference for Unicode pattern.
-												test262: test262/test/language/literals/regexp/u-dec-esc.js
https://bugs.webkit.org/show_bug.cgi?id=170687

Patch by Joseph Pecoraro <pecoraro@apple.com> on 2017-04-12
Reviewed by Michael Saboff.

JSTests:

* test262.yaml:

Source/JavaScriptCore:

* yarr/YarrParser.h:
(JSC::Yarr::Parser::parseEscape):
* yarr/YarrPattern.cpp:
(JSC::Yarr::YarrPattern::errorMessage):
(JSC::Yarr::YarrPattern::compile):
* yarr/YarrPattern.h:
In unicoe patterns, invalid backreferences are an error.

LayoutTests:

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:
Extend test to test invalid backreferences.

Canonical link: https://commits.webkit.org/187729@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@215311 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2017-04-13 02:51:18 +00:00
+								PASS /\1/ did not throw exception.
 								PASS /\2/ did not throw exception.
 								PASS /\3/ did not throw exception.
 								PASS /\4/ did not throw exception.
 								PASS /\5/ did not throw exception.
 								PASS /\6/ did not throw exception.
 								PASS /\7/ did not throw exception.
 								PASS /\8/ did not throw exception.
 								PASS /\9/ did not throw exception.
-												[ES6] Add support for Unicode regular expressions
https://bugs.webkit.org/show_bug.cgi?id=154842

Reviewed by Filip Pizlo.

Source/JavaScriptCore:

Added processing of Unicode regular expressions to the Yarr interpreter.

Changed parsing of regular expression patterns and PatternTerms to process characters as
UChar32 in the Yarr code.  The parser converts matched surrogate pairs into the appropriate
Unicode character when the expression is parsed.  When matching a unicode expression and
reading source characters, we convert proper surrogate pair into a Unicode character and
advance the source cursor, "pos", one more position.  The exception to this is when we
know when generating a fixed character atom that we need to match a unicode character
that doesn't fit in 16 bits.  The code calls this an extendedUnicodeCharacter and has a
helper to determine this.

Added 'u' flag and 'unicode' identifier to regular expression classes.  Added an "isUnicode"
parameter to YarrPattern pattern() and internal users of that function.

Updated the generation of the canonicalization tables to include a new set a tables that
follow the ES 6.0, 21.2.2.8.2 Step 2.  Renamed the YarrCanonicalizeUCS2.* files to
YarrCanonicalizeUnicode.*.

Added a new Layout/js test that tests the added functionality.  Updated other tests that
have minor es6 unicode checks and look for valid flags.

Ran the ChakraCore Unicode regular expression tests as well.

* CMakeLists.txt:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj:
* JavaScriptCore.vcxproj/JavaScriptCore.vcxproj.filters:
* JavaScriptCore.xcodeproj/project.pbxproj:

* inspector/ContentSearchUtilities.cpp:
(Inspector::ContentSearchUtilities::findMagicComment):
* yarr/RegularExpression.cpp:
(JSC::Yarr::RegularExpression::Private::compile):
Updated use of pattern().

* runtime/CommonIdentifiers.h:
* runtime/RegExp.cpp:
(JSC::regExpFlags):
(JSC::RegExpFunctionalTestCollector::outputOneTest):
(JSC::RegExp::finishCreation):
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* runtime/RegExp.h:
* runtime/RegExpKey.h:
* runtime/RegExpPrototype.cpp:
(JSC::regExpProtoFuncCompile):
(JSC::flagsString):
(JSC::regExpProtoGetterMultiline):
(JSC::regExpProtoGetterUnicode):
(JSC::regExpProtoGetterFlags):
Updated for new 'y' (unicode) flag.  Add check to use the interpreter for unicode regular expressions.

* tests/es6.yaml:
* tests/stress/static-getter-in-names.js:
Updated tests for new flag and for passing the minimal es6 regular expression processing.

* yarr/Yarr.h: Updated the size of information now kept for backtracking.

* yarr/YarrCanonicalizeUCS2.cpp: Removed.
* yarr/YarrCanonicalizeUCS2.h: Removed.
* yarr/YarrCanonicalizeUCS2.js: Removed.
* yarr/YarrCanonicalizeUnicode.cpp: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.cpp.
* yarr/YarrCanonicalizeUnicode.h: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.h.
(JSC::Yarr::canonicalCharacterSetInfo):
(JSC::Yarr::canonicalRangeInfoFor):
(JSC::Yarr::getCanonicalPair):
(JSC::Yarr::isCanonicallyUnique):
(JSC::Yarr::areCanonicallyEquivalent):
(JSC::Yarr::rangeInfoFor): Deleted.
* yarr/YarrCanonicalizeUnicode.js: Copied from Source/JavaScriptCore/yarr/YarrCanonicalizeUCS2.js.
(printHeader):
(printFooter):
(hex):
(canonicalize):
(canonicalizeUnicode):
(createUCS2CanonicalGroups):
(createUnicodeCanonicalGroups):
(cu.in.groupedCanonically.characters.sort): Deleted.
(cu.in.groupedCanonically.else): Deleted.
Refactored to output two sets of tables, one for UCS2 and one for Unicode.  The UCS2 tables follow
the legacy canonicalization rules now specified in ES 6.0, 21.2.2.8.2 Step 3.  The new Unicode
tables follow the rules specified in ES 6.0, 21.2.2.8.2 Step 2.  Eliminated the unused Latin1 tables.

* yarr/YarrInterpreter.cpp:
(JSC::Yarr::Interpreter::InputStream::InputStream):
(JSC::Yarr::Interpreter::InputStream::readChecked):
(JSC::Yarr::Interpreter::InputStream::readSurrogatePairChecked):
(JSC::Yarr::Interpreter::InputStream::reread):
(JSC::Yarr::Interpreter::InputStream::prev):
(JSC::Yarr::Interpreter::testCharacterClass):
(JSC::Yarr::Interpreter::checkCharacter):
(JSC::Yarr::Interpreter::checkSurrogatePair):
(JSC::Yarr::Interpreter::checkCasedCharacter):
(JSC::Yarr::Interpreter::tryConsumeBackReference):
(JSC::Yarr::Interpreter::backtrackPatternCharacter):
(JSC::Yarr::Interpreter::matchCharacterClass):
(JSC::Yarr::Interpreter::backtrackCharacterClass):
(JSC::Yarr::Interpreter::matchParenthesesTerminalEnd):
(JSC::Yarr::Interpreter::matchDisjunction):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::ByteCompiler::assertionWordBoundary):
(JSC::Yarr::ByteCompiler::atomPatternCharacter):
* yarr/YarrInterpreter.h:
(JSC::Yarr::ByteTerm::ByteTerm):
(JSC::Yarr::BytecodePattern::BytecodePattern):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::optimizeAlternative):
(JSC::Yarr::YarrGenerator::matchCharacterClassRange):
(JSC::Yarr::YarrGenerator::matchCharacterClass):
(JSC::Yarr::YarrGenerator::notAtEndOfInput):
(JSC::Yarr::YarrGenerator::jumpIfCharNotEquals):
(JSC::Yarr::YarrGenerator::generatePatternCharacterOnce):
(JSC::Yarr::YarrGenerator::generatePatternCharacterFixed):
(JSC::Yarr::YarrGenerator::generatePatternCharacterGreedy):
(JSC::Yarr::YarrGenerator::backtrackPatternCharacterNonGreedy):
* yarr/YarrParser.h:
(JSC::Yarr::Parser::CharacterClassParserDelegate::atomPatternCharacter):
(JSC::Yarr::Parser::Parser):
(JSC::Yarr::Parser::parseEscape):
(JSC::Yarr::Parser::consumePossibleSurrogatePair):
(JSC::Yarr::Parser::parseCharacterClass):
(JSC::Yarr::Parser::parseTokens):
(JSC::Yarr::Parser::parse):
(JSC::Yarr::Parser::atEndOfPattern):
(JSC::Yarr::Parser::patternRemaining):
(JSC::Yarr::Parser::peek):
(JSC::Yarr::parse):
* yarr/YarrPattern.cpp:
(JSC::Yarr::CharacterClassConstructor::CharacterClassConstructor):
(JSC::Yarr::CharacterClassConstructor::append):
(JSC::Yarr::CharacterClassConstructor::putChar):
(JSC::Yarr::CharacterClassConstructor::putUnicodeIgnoreCase):
(JSC::Yarr::CharacterClassConstructor::putRange):
(JSC::Yarr::CharacterClassConstructor::charClass):
(JSC::Yarr::CharacterClassConstructor::addSorted):
(JSC::Yarr::CharacterClassConstructor::addSortedRange):
(JSC::Yarr::YarrPatternConstructor::YarrPatternConstructor):
(JSC::Yarr::YarrPatternConstructor::assertionWordBoundary):
(JSC::Yarr::YarrPatternConstructor::atomPatternCharacter):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassBegin):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassAtom):
(JSC::Yarr::YarrPatternConstructor::atomCharacterClassRange):
(JSC::Yarr::YarrPatternConstructor::setupAlternativeOffsets):
(JSC::Yarr::YarrPattern::compile):
(JSC::Yarr::YarrPattern::YarrPattern):
* yarr/YarrPattern.h:
(JSC::Yarr::CharacterRange::CharacterRange):
(JSC::Yarr::CharacterClass::CharacterClass):
(JSC::Yarr::PatternTerm::PatternTerm):
(JSC::Yarr::YarrPattern::reset):
* yarr/YarrSyntaxChecker.cpp:
(JSC::Yarr::SyntaxChecker::assertionBOL):
(JSC::Yarr::SyntaxChecker::assertionEOL):
(JSC::Yarr::SyntaxChecker::assertionWordBoundary):
(JSC::Yarr::SyntaxChecker::atomPatternCharacter):
(JSC::Yarr::SyntaxChecker::atomBuiltInCharacterClass):
(JSC::Yarr::SyntaxChecker::atomCharacterClassBegin):
(JSC::Yarr::SyntaxChecker::atomCharacterClassAtom):
(JSC::Yarr::checkSyntax):

LayoutTests:

Added a new test for the added unicode regular expression processing.

Updated several tests for the y flag changes and "unicode" property.

* js/regexp-unicode-expected.txt: Added.
* js/regexp-unicode.html: Added.
* js/script-tests/regexp-unicode.js: Added.
New test.

* js/Object-getOwnPropertyNames-expected.txt:
* js/regexp-flags-expected.txt:
* js/script-tests/Object-getOwnPropertyNames.js:
* js/script-tests/regexp-flags.js:
(RegExp.prototype.hasOwnProperty):
Updated tests.


Canonical link: https://commits.webkit.org/172980@main
git-svn-id: https://svn.webkit.org/repository/webkit/trunk@197426 268f45cc-cd09-0410-ab3c-d52691b4dbfc

											
										
										
											2016-03-02 00:39:01 +00:00
+								PASS successfullyParsed is true
 								TEST COMPLETE