469 lines
13 KiB
C++
469 lines
13 KiB
C++
/*
|
|
* Copyright 2005 Maksim Orlovich <maksim@kde.org>
|
|
* Copyright (C) 2006, 2013 Apple Inc. All rights reserved.
|
|
* Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include "XPathParser.h"
|
|
|
|
#include "XPathEvaluator.h"
|
|
#include "XPathNSResolver.h"
|
|
#include "XPathPath.h"
|
|
#include "XPathStep.h"
|
|
#include <wtf/NeverDestroyed.h>
|
|
#include <wtf/RobinHoodHashMap.h>
|
|
#include <wtf/StdLibExtras.h>
|
|
#include <wtf/text/StringHash.h>
|
|
|
|
using namespace WebCore::XPath;
|
|
|
|
extern int xpathyyparse(WebCore::XPath::Parser&);
|
|
|
|
#include "XPathGrammar.h"
|
|
|
|
namespace WebCore {
|
|
namespace XPath {
|
|
|
|
struct Parser::Token {
|
|
int type;
|
|
String string;
|
|
Step::Axis axis;
|
|
NumericOp::Opcode numericOpcode;
|
|
EqTestOp::Opcode equalityTestOpcode;
|
|
|
|
Token(int type) : type(type) { }
|
|
Token(int type, const String& string) : type(type), string(string) { }
|
|
Token(int type, Step::Axis axis) : type(type), axis(axis) { }
|
|
Token(int type, NumericOp::Opcode opcode) : type(type), numericOpcode(opcode) { }
|
|
Token(int type, EqTestOp::Opcode opcode) : type(type), equalityTestOpcode(opcode) { }
|
|
};
|
|
|
|
enum XMLCat { NameStart, NameCont, NotPartOfName };
|
|
|
|
static XMLCat charCat(UChar character)
|
|
{
|
|
if (character == '_')
|
|
return NameStart;
|
|
|
|
if (character == '.' || character == '-')
|
|
return NameCont;
|
|
unsigned characterTypeMask = U_GET_GC_MASK(character);
|
|
if (characterTypeMask & (U_GC_LU_MASK | U_GC_LL_MASK | U_GC_LO_MASK | U_GC_LT_MASK | U_GC_NL_MASK))
|
|
return NameStart;
|
|
if (characterTypeMask & (U_GC_M_MASK | U_GC_LM_MASK | U_GC_ND_MASK))
|
|
return NameCont;
|
|
return NotPartOfName;
|
|
}
|
|
|
|
static MemoryCompactLookupOnlyRobinHoodHashMap<String, Step::Axis> createAxisNamesMap()
|
|
{
|
|
struct AxisName {
|
|
ASCIILiteral name;
|
|
Step::Axis axis;
|
|
};
|
|
const AxisName axisNameList[] = {
|
|
{ "ancestor"_s, Step::AncestorAxis },
|
|
{ "ancestor-or-self"_s, Step::AncestorOrSelfAxis },
|
|
{ "attribute"_s, Step::AttributeAxis },
|
|
{ "child"_s, Step::ChildAxis },
|
|
{ "descendant"_s, Step::DescendantAxis },
|
|
{ "descendant-or-self"_s, Step::DescendantOrSelfAxis },
|
|
{ "following"_s, Step::FollowingAxis },
|
|
{ "following-sibling"_s, Step::FollowingSiblingAxis },
|
|
{ "namespace"_s, Step::NamespaceAxis },
|
|
{ "parent"_s, Step::ParentAxis },
|
|
{ "preceding"_s, Step::PrecedingAxis },
|
|
{ "preceding-sibling"_s, Step::PrecedingSiblingAxis },
|
|
{ "self"_s, Step::SelfAxis }
|
|
};
|
|
MemoryCompactLookupOnlyRobinHoodHashMap<String, Step::Axis> map;
|
|
for (auto& axisName : axisNameList)
|
|
map.add(axisName.name, axisName.axis);
|
|
return map;
|
|
}
|
|
|
|
static bool parseAxisName(const String& name, Step::Axis& type)
|
|
{
|
|
static const auto axisNames = makeNeverDestroyed(createAxisNamesMap());
|
|
auto it = axisNames.get().find(name);
|
|
if (it == axisNames.get().end())
|
|
return false;
|
|
type = it->value;
|
|
return true;
|
|
}
|
|
|
|
// Returns whether the current token can possibly be a binary operator, given
|
|
// the previous token. Necessary to disambiguate some of the operators
|
|
// (* (multiply), div, and, or, mod) in the [32] Operator rule
|
|
// (check http://www.w3.org/TR/xpath#exprlex).
|
|
bool Parser::isBinaryOperatorContext() const
|
|
{
|
|
switch (m_lastTokenType) {
|
|
case 0:
|
|
case '@': case AXISNAME: case '(': case '[': case ',':
|
|
case AND: case OR: case MULOP:
|
|
case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
|
|
case EQOP: case RELOP:
|
|
return false;
|
|
default:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
void Parser::skipWS()
|
|
{
|
|
while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
|
|
++m_nextPos;
|
|
}
|
|
|
|
Parser::Token Parser::makeTokenAndAdvance(int code, int advance)
|
|
{
|
|
m_nextPos += advance;
|
|
return Token(code);
|
|
}
|
|
|
|
Parser::Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
|
|
{
|
|
m_nextPos += advance;
|
|
return Token(code, val);
|
|
}
|
|
|
|
Parser::Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
|
|
{
|
|
m_nextPos += advance;
|
|
return Token(code, val);
|
|
}
|
|
|
|
// Returns next char if it's there and interesting, 0 otherwise.
|
|
char Parser::peekAheadHelper()
|
|
{
|
|
if (m_nextPos + 1 >= m_data.length())
|
|
return 0;
|
|
UChar next = m_data[m_nextPos + 1];
|
|
if (next >= 0xff)
|
|
return 0;
|
|
return next;
|
|
}
|
|
|
|
char Parser::peekCurHelper()
|
|
{
|
|
if (m_nextPos >= m_data.length())
|
|
return 0;
|
|
UChar next = m_data[m_nextPos];
|
|
if (next >= 0xff)
|
|
return 0;
|
|
return next;
|
|
}
|
|
|
|
Parser::Token Parser::lexString()
|
|
{
|
|
UChar delimiter = m_data[m_nextPos];
|
|
int startPos = m_nextPos + 1;
|
|
|
|
for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
|
|
if (m_data[m_nextPos] == delimiter) {
|
|
String value = m_data.substring(startPos, m_nextPos - startPos);
|
|
if (value.isNull())
|
|
value = emptyString();
|
|
++m_nextPos; // Consume the char.
|
|
return Token(LITERAL, value);
|
|
}
|
|
}
|
|
|
|
// Ouch, went off the end -- report error.
|
|
return Token(XPATH_ERROR);
|
|
}
|
|
|
|
Parser::Token Parser::lexNumber()
|
|
{
|
|
int startPos = m_nextPos;
|
|
bool seenDot = false;
|
|
|
|
// Go until end or a non-digits character.
|
|
for (; m_nextPos < m_data.length(); ++m_nextPos) {
|
|
UChar aChar = m_data[m_nextPos];
|
|
if (aChar >= 0xff) break;
|
|
|
|
if (!isASCIIDigit(aChar)) {
|
|
if (aChar == '.' && !seenDot)
|
|
seenDot = true;
|
|
else
|
|
break;
|
|
}
|
|
}
|
|
|
|
return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
|
|
}
|
|
|
|
bool Parser::lexNCName(String& name)
|
|
{
|
|
int startPos = m_nextPos;
|
|
if (m_nextPos >= m_data.length())
|
|
return false;
|
|
|
|
if (charCat(m_data[m_nextPos]) != NameStart)
|
|
return false;
|
|
|
|
// Keep going until we get a character that's not good for names.
|
|
for (; m_nextPos < m_data.length(); ++m_nextPos)
|
|
if (charCat(m_data[m_nextPos]) == NotPartOfName)
|
|
break;
|
|
|
|
name = m_data.substring(startPos, m_nextPos - startPos);
|
|
return true;
|
|
}
|
|
|
|
bool Parser::lexQName(String& name)
|
|
{
|
|
String n1;
|
|
if (!lexNCName(n1))
|
|
return false;
|
|
|
|
skipWS();
|
|
|
|
// If the next character is :, what we just got it the prefix, if not,
|
|
// it's the whole thing.
|
|
if (peekAheadHelper() != ':') {
|
|
name = n1;
|
|
return true;
|
|
}
|
|
|
|
String n2;
|
|
if (!lexNCName(n2))
|
|
return false;
|
|
|
|
name = n1 + ":" + n2;
|
|
return true;
|
|
}
|
|
|
|
inline Parser::Token Parser::nextTokenInternal()
|
|
{
|
|
skipWS();
|
|
|
|
if (m_nextPos >= m_data.length())
|
|
return Token(0);
|
|
|
|
char code = peekCurHelper();
|
|
switch (code) {
|
|
case '(': case ')': case '[': case ']':
|
|
case '@': case ',': case '|':
|
|
return makeTokenAndAdvance(code);
|
|
case '\'':
|
|
case '\"':
|
|
return lexString();
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
return lexNumber();
|
|
case '.': {
|
|
char next = peekAheadHelper();
|
|
if (next == '.')
|
|
return makeTokenAndAdvance(DOTDOT, 2);
|
|
if (isASCIIDigit(next))
|
|
return lexNumber();
|
|
return makeTokenAndAdvance('.');
|
|
}
|
|
case '/':
|
|
if (peekAheadHelper() == '/')
|
|
return makeTokenAndAdvance(SLASHSLASH, 2);
|
|
return makeTokenAndAdvance('/');
|
|
case '+':
|
|
return makeTokenAndAdvance(PLUS);
|
|
case '-':
|
|
return makeTokenAndAdvance(MINUS);
|
|
case '=':
|
|
return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
|
|
case '!':
|
|
if (peekAheadHelper() == '=')
|
|
return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
|
|
return Token(XPATH_ERROR);
|
|
case '<':
|
|
if (peekAheadHelper() == '=')
|
|
return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
|
|
return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
|
|
case '>':
|
|
if (peekAheadHelper() == '=')
|
|
return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
|
|
return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
|
|
case '*':
|
|
if (isBinaryOperatorContext())
|
|
return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
|
|
++m_nextPos;
|
|
return Token(NAMETEST, "*");
|
|
case '$': { // $ QName
|
|
m_nextPos++;
|
|
String name;
|
|
if (!lexQName(name))
|
|
return Token(XPATH_ERROR);
|
|
return Token(VARIABLEREFERENCE, name);
|
|
}
|
|
}
|
|
|
|
String name;
|
|
if (!lexNCName(name))
|
|
return Token(XPATH_ERROR);
|
|
|
|
skipWS();
|
|
// If we're in an operator context, check for any operator names
|
|
if (isBinaryOperatorContext()) {
|
|
if (name == "and") //### hash?
|
|
return Token(AND);
|
|
if (name == "or")
|
|
return Token(OR);
|
|
if (name == "mod")
|
|
return Token(MULOP, NumericOp::OP_Mod);
|
|
if (name == "div")
|
|
return Token(MULOP, NumericOp::OP_Div);
|
|
}
|
|
|
|
// See whether we are at a :
|
|
if (peekCurHelper() == ':') {
|
|
m_nextPos++;
|
|
// Any chance it's an axis name?
|
|
if (peekCurHelper() == ':') {
|
|
m_nextPos++;
|
|
|
|
//It might be an axis name.
|
|
Step::Axis axis;
|
|
if (parseAxisName(name, axis))
|
|
return Token(AXISNAME, axis);
|
|
// Ugh, :: is only valid in axis names -> error
|
|
return Token(XPATH_ERROR);
|
|
}
|
|
|
|
// Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
|
|
skipWS();
|
|
if (peekCurHelper() == '*') {
|
|
m_nextPos++;
|
|
return Token(NAMETEST, name + ":*");
|
|
}
|
|
|
|
// Make a full qname.
|
|
String n2;
|
|
if (!lexNCName(n2))
|
|
return Token(XPATH_ERROR);
|
|
|
|
name = name + ":" + n2;
|
|
}
|
|
|
|
skipWS();
|
|
|
|
if (peekCurHelper() == '(') {
|
|
// note: we don't swallow the '(' here!
|
|
|
|
// Either node type oor function name.
|
|
|
|
if (name == "processing-instruction")
|
|
return Token(PI);
|
|
if (name == "node")
|
|
return Token(NODE);
|
|
if (name == "text")
|
|
return Token(TEXT_);
|
|
if (name == "comment")
|
|
return Token(COMMENT);
|
|
|
|
return Token(FUNCTIONNAME, name);
|
|
}
|
|
|
|
// At this point, it must be NAMETEST.
|
|
return Token(NAMETEST, name);
|
|
}
|
|
|
|
inline Parser::Token Parser::nextToken()
|
|
{
|
|
Token token = nextTokenInternal();
|
|
m_lastTokenType = token.type;
|
|
return token;
|
|
}
|
|
|
|
Parser::Parser(const String& statement, RefPtr<XPathNSResolver>&& resolver)
|
|
: m_data(statement)
|
|
, m_resolver(WTFMove(resolver))
|
|
{
|
|
}
|
|
|
|
int Parser::lex(YYSTYPE& yylval)
|
|
{
|
|
Token token = nextToken();
|
|
|
|
switch (token.type) {
|
|
case AXISNAME:
|
|
yylval.axis = token.axis;
|
|
break;
|
|
case MULOP:
|
|
yylval.numericOpcode = token.numericOpcode;
|
|
break;
|
|
case RELOP:
|
|
case EQOP:
|
|
yylval.equalityTestOpcode = token.equalityTestOpcode;
|
|
break;
|
|
case NODETYPE:
|
|
case FUNCTIONNAME:
|
|
case LITERAL:
|
|
case VARIABLEREFERENCE:
|
|
case NUMBER:
|
|
case NAMETEST:
|
|
yylval.string = token.string.releaseImpl().leakRef();
|
|
break;
|
|
}
|
|
|
|
return token.type;
|
|
}
|
|
|
|
bool Parser::expandQualifiedName(const String& qualifiedName, String& localName, String& namespaceURI)
|
|
{
|
|
size_t colon = qualifiedName.find(':');
|
|
if (colon != notFound) {
|
|
if (!m_resolver) {
|
|
m_sawNamespaceError = true;
|
|
return false;
|
|
}
|
|
namespaceURI = m_resolver->lookupNamespaceURI(qualifiedName.left(colon));
|
|
if (namespaceURI.isNull()) {
|
|
m_sawNamespaceError = true;
|
|
return false;
|
|
}
|
|
localName = qualifiedName.substring(colon + 1);
|
|
} else
|
|
localName = qualifiedName;
|
|
return true;
|
|
}
|
|
|
|
ExceptionOr<std::unique_ptr<Expression>> Parser::parseStatement(const String& statement, RefPtr<XPathNSResolver>&& resolver)
|
|
{
|
|
Parser parser { statement, WTFMove(resolver) };
|
|
|
|
int parseError = xpathyyparse(parser);
|
|
|
|
if (parser.m_sawNamespaceError)
|
|
return Exception { NamespaceError };
|
|
|
|
if (parseError)
|
|
return Exception { SyntaxError };
|
|
|
|
return WTFMove(parser.m_result);
|
|
}
|
|
|
|
} }
|