308 lines
10 KiB
C++
308 lines
10 KiB
C++
/*
|
|
* Copyright (C) 1999 Lars Knoll (knoll@kde.org)
|
|
* (C) 1999 Antti Koivisto (koivisto@kde.org)
|
|
* (C) 2001 Dirk Mueller (mueller@kde.org)
|
|
* (C) 2006 Alexey Proskuryakov (ap@webkit.org)
|
|
* Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public License
|
|
* along with this library; see the file COPYING.LIB. If not, write to
|
|
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include "SharedStringHash.h"
|
|
|
|
#include <wtf/URL.h>
|
|
#include <wtf/text/AtomString.h>
|
|
#include <wtf/text/StringHash.h>
|
|
#include <wtf/text/StringView.h>
|
|
|
|
namespace WebCore {
|
|
|
|
template <typename CharacterType>
|
|
static inline size_t findSlashDotDotSlash(const CharacterType* characters, size_t length, size_t position)
|
|
{
|
|
if (length < 4)
|
|
return notFound;
|
|
size_t loopLimit = length - 3;
|
|
for (size_t i = position; i < loopLimit; ++i) {
|
|
if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
|
|
return i;
|
|
}
|
|
return notFound;
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static inline size_t findSlashSlash(const CharacterType* characters, size_t length, size_t position)
|
|
{
|
|
if (length < 2)
|
|
return notFound;
|
|
size_t loopLimit = length - 1;
|
|
for (size_t i = position; i < loopLimit; ++i) {
|
|
if (characters[i] == '/' && characters[i + 1] == '/')
|
|
return i;
|
|
}
|
|
return notFound;
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static inline size_t findSlashDotSlash(const CharacterType* characters, size_t length, size_t position)
|
|
{
|
|
if (length < 3)
|
|
return notFound;
|
|
size_t loopLimit = length - 2;
|
|
for (size_t i = position; i < loopLimit; ++i) {
|
|
if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
|
|
return i;
|
|
}
|
|
return notFound;
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static inline bool containsColonSlashSlash(const CharacterType* characters, unsigned length)
|
|
{
|
|
if (length < 3)
|
|
return false;
|
|
unsigned loopLimit = length - 2;
|
|
for (unsigned i = 0; i < loopLimit; ++i) {
|
|
if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static inline void squeezeOutNullCharacters(Vector<CharacterType, 512>& string)
|
|
{
|
|
size_t size = string.size();
|
|
size_t i = 0;
|
|
for (i = 0; i < size; ++i) {
|
|
if (!string[i])
|
|
break;
|
|
}
|
|
if (i == size)
|
|
return;
|
|
size_t j = i;
|
|
for (++i; i < size; ++i) {
|
|
if (CharacterType character = string[i])
|
|
string[j++] = character;
|
|
}
|
|
ASSERT(j < size);
|
|
string.shrink(j);
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static void cleanSlashDotDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash)
|
|
{
|
|
size_t slash = firstSlash;
|
|
do {
|
|
size_t previousSlash = slash ? reverseFind(path.data(), path.size(), '/', slash - 1) : notFound;
|
|
// Don't remove the host, i.e. http://foo.org/../foo.html
|
|
if (previousSlash == notFound || (previousSlash > 3 && path[previousSlash - 2] == ':' && path[previousSlash - 1] == '/')) {
|
|
path[slash] = 0;
|
|
path[slash + 1] = 0;
|
|
path[slash + 2] = 0;
|
|
} else {
|
|
for (size_t i = previousSlash; i < slash + 3; ++i)
|
|
path[i] = 0;
|
|
}
|
|
slash += 3;
|
|
} while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound);
|
|
squeezeOutNullCharacters(path);
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static void mergeDoubleSlashes(Vector<CharacterType, 512>& path, size_t firstSlash)
|
|
{
|
|
size_t refPos = find(path.data(), path.size(), '#');
|
|
if (!refPos || refPos == notFound)
|
|
refPos = path.size();
|
|
|
|
size_t slash = firstSlash;
|
|
while (slash < refPos) {
|
|
if (!slash || path[slash - 1] != ':')
|
|
path[slash++] = 0;
|
|
else
|
|
slash += 2;
|
|
if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound)
|
|
break;
|
|
}
|
|
squeezeOutNullCharacters(path);
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static void cleanSlashDotSlashes(Vector<CharacterType, 512>& path, size_t firstSlash)
|
|
{
|
|
size_t slash = firstSlash;
|
|
do {
|
|
path[slash] = 0;
|
|
path[slash + 1] = 0;
|
|
slash += 2;
|
|
} while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound);
|
|
squeezeOutNullCharacters(path);
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static inline void cleanPath(Vector<CharacterType, 512>& path)
|
|
{
|
|
// FIXME: Should not do this in the query or anchor part of the URL.
|
|
size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), 0);
|
|
if (firstSlash != notFound)
|
|
cleanSlashDotDotSlashes(path, firstSlash);
|
|
|
|
// FIXME: Should not do this in the query part.
|
|
firstSlash = findSlashSlash(path.data(), path.size(), 0);
|
|
if (firstSlash != notFound)
|
|
mergeDoubleSlashes(path, firstSlash);
|
|
|
|
// FIXME: Should not do this in the query or anchor part.
|
|
firstSlash = findSlashDotSlash(path.data(), path.size(), 0);
|
|
if (firstSlash != notFound)
|
|
cleanSlashDotSlashes(path, firstSlash);
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static inline bool matchLetter(CharacterType c, char lowercaseLetter)
|
|
{
|
|
return (c | 0x20) == lowercaseLetter;
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static inline bool needsTrailingSlash(const CharacterType* characters, unsigned length)
|
|
{
|
|
if (length < 6)
|
|
return false;
|
|
if (!matchLetter(characters[0], 'h') || !matchLetter(characters[1], 't') || !matchLetter(characters[2], 't') || !matchLetter(characters[3], 'p'))
|
|
return false;
|
|
if (!(characters[4] == ':' || (matchLetter(characters[4], 's') && characters[5] == ':')))
|
|
return false;
|
|
|
|
unsigned pos = characters[4] == ':' ? 5 : 6;
|
|
|
|
// Skip initial two slashes if present.
|
|
if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
|
|
pos += 2;
|
|
|
|
// Find next slash.
|
|
while (pos < length && characters[pos] != '/')
|
|
++pos;
|
|
|
|
return pos == length;
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static ALWAYS_INLINE SharedStringHash computeSharedStringHashInline(const CharacterType* url, unsigned length)
|
|
{
|
|
return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length));
|
|
}
|
|
|
|
SharedStringHash computeSharedStringHash(const String& url)
|
|
{
|
|
unsigned length = url.length();
|
|
if (!length || url.is8Bit())
|
|
return computeSharedStringHashInline(url.characters8(), length);
|
|
return computeSharedStringHashInline(url.characters16(), length);
|
|
}
|
|
|
|
SharedStringHash computeSharedStringHash(const UChar* url, unsigned length)
|
|
{
|
|
return computeSharedStringHashInline(url, length);
|
|
}
|
|
|
|
template <typename CharacterType>
|
|
static ALWAYS_INLINE void computeSharedStringHashInline(const URL& base, const CharacterType* characters, unsigned length, Vector<CharacterType, 512>& buffer)
|
|
{
|
|
if (!length)
|
|
return;
|
|
|
|
// This is a poor man's completeURL. Faster with less memory allocation.
|
|
// FIXME: It's missing a lot of what completeURL does and a lot of what URL does.
|
|
// For example, it does not handle international domain names properly.
|
|
|
|
// FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
|
|
// 1) The "://" could be in the query or anchor.
|
|
// 2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.
|
|
|
|
// FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
|
|
// have a query or anchor.
|
|
|
|
bool hasColonSlashSlash = containsColonSlashSlash(characters, length);
|
|
|
|
if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) {
|
|
buffer.append(characters, length);
|
|
return;
|
|
}
|
|
|
|
|
|
if (hasColonSlashSlash) {
|
|
// FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
|
|
// end of the path, *before* the query or anchor.
|
|
buffer.append(characters, length);
|
|
buffer.append('/');
|
|
return;
|
|
}
|
|
|
|
if (!length)
|
|
append(buffer, base.string());
|
|
else {
|
|
switch (characters[0]) {
|
|
case '/':
|
|
append(buffer, StringView(base.string()).substring(0, base.pathStart()));
|
|
break;
|
|
case '#':
|
|
append(buffer, StringView(base.string()).substring(0, base.pathEnd()));
|
|
break;
|
|
default:
|
|
append(buffer, StringView(base.string()).substring(0, base.pathAfterLastSlash()));
|
|
break;
|
|
}
|
|
}
|
|
buffer.append(characters, length);
|
|
cleanPath(buffer);
|
|
if (needsTrailingSlash(buffer.data(), buffer.size())) {
|
|
// FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
|
|
// end of the path, *before* the query or anchor.
|
|
buffer.append('/');
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
SharedStringHash computeVisitedLinkHash(const URL& base, const AtomString& attributeURL)
|
|
{
|
|
if (attributeURL.isEmpty())
|
|
return 0;
|
|
|
|
if (!base.string().isEmpty() && base.string().is8Bit() && attributeURL.is8Bit()) {
|
|
Vector<LChar, 512> url;
|
|
computeSharedStringHashInline(base, attributeURL.characters8(), attributeURL.length(), url);
|
|
if (url.isEmpty())
|
|
return 0;
|
|
|
|
return computeSharedStringHashInline(url.data(), url.size());
|
|
}
|
|
|
|
Vector<UChar, 512> url;
|
|
auto upconvertedCharacters = StringView(attributeURL.string()).upconvertedCharacters();
|
|
const UChar* characters = upconvertedCharacters;
|
|
computeSharedStringHashInline(base, characters, attributeURL.length(), url);
|
|
if (url.isEmpty())
|
|
return 0;
|
|
|
|
return computeSharedStringHashInline(url.data(), url.size());
|
|
}
|
|
|
|
} // namespace WebCore
|