237 lines
10 KiB
C++
237 lines
10 KiB
C++
/*
|
|
* Copyright (C) 2008-2017 Apple Inc. All Rights Reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
|
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
|
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "config.h"
|
|
#include "ApplicationCacheManifestParser.h"
|
|
|
|
#include "ParsingUtilities.h"
|
|
#include "TextResourceDecoder.h"
|
|
#include <wtf/text/StringHash.h>
|
|
#include <wtf/text/StringParsingBuffer.h>
|
|
#include <wtf/text/StringView.h>
|
|
|
|
namespace WebCore {
|
|
|
|
enum class ApplicationCacheParserMode { Explicit, Fallback, OnlineAllowlist, Unknown };
|
|
|
|
static StringView manifestPath(const URL& manifestURL)
|
|
{
|
|
auto manifestPath = manifestURL.path();
|
|
ASSERT(manifestPath[0] == '/');
|
|
manifestPath = manifestPath.substring(0, manifestPath.reverseFind('/') + 1);
|
|
ASSERT(manifestPath[0] == manifestPath[manifestPath.length() - 1]);
|
|
return manifestPath;
|
|
}
|
|
|
|
template<typename CharacterType> static constexpr bool isManifestWhitespace(CharacterType character)
|
|
{
|
|
return character == ' ' || character == '\t';
|
|
}
|
|
|
|
template<typename CharacterType> static constexpr bool isManifestNewline(CharacterType character)
|
|
{
|
|
return character == '\n' || character == '\r';
|
|
}
|
|
|
|
template<typename CharacterType> static constexpr bool isManifestWhitespaceOrNewline(CharacterType character)
|
|
{
|
|
return isManifestWhitespace(character) || isManifestNewline(character);
|
|
}
|
|
|
|
template<typename CharacterType> static URL makeManifestURL(const URL& manifestURL, const CharacterType* start, const CharacterType* end)
|
|
{
|
|
URL url(manifestURL, String(start, end - start));
|
|
url.removeFragmentIdentifier();
|
|
return url;
|
|
}
|
|
|
|
template<typename CharacterType> static constexpr CharacterType cacheManifestIdentifier[] = { 'C', 'A', 'C', 'H', 'E', ' ', 'M', 'A', 'N', 'I', 'F', 'E', 'S', 'T' };
|
|
template<typename CharacterType> static constexpr CharacterType cacheModeIdentifier[] = { 'C', 'A', 'C', 'H', 'E' };
|
|
template<typename CharacterType> static constexpr CharacterType fallbackModeIdentifier[] = { 'F', 'A', 'L', 'L', 'B', 'A', 'C', 'K' };
|
|
template<typename CharacterType> static constexpr CharacterType networkModeIdentifier[] = { 'N', 'E', 'T', 'W', 'O', 'R', 'K' };
|
|
|
|
std::optional<ApplicationCacheManifest> parseApplicationCacheManifest(const URL& manifestURL, const String& manifestMIMEType, const uint8_t* data, int length)
|
|
{
|
|
static constexpr const char cacheManifestMIMEType[] = "text/cache-manifest";
|
|
bool allowFallbackNamespaceOutsideManifestPath = equalLettersIgnoringASCIICase(manifestMIMEType, cacheManifestMIMEType);
|
|
auto manifestPath = WebCore::manifestPath(manifestURL);
|
|
|
|
auto manifestString = TextResourceDecoder::create(ASCIILiteral::fromLiteralUnsafe(cacheManifestMIMEType), "UTF-8")->decodeAndFlush(data, length);
|
|
|
|
return readCharactersForParsing(manifestString, [&](auto buffer) -> std::optional<ApplicationCacheManifest> {
|
|
using CharacterType = typename decltype(buffer)::CharacterType;
|
|
|
|
ApplicationCacheManifest manifest;
|
|
auto mode = ApplicationCacheParserMode::Explicit;
|
|
|
|
// Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" (the BOM is removed by TextResourceDecoder).
|
|
// Example: "CACHE MANIFEST #comment" is a valid signature.
|
|
// Example: "CACHE MANIFEST;V2" is not.
|
|
if (!skipCharactersExactly(buffer, cacheManifestIdentifier<CharacterType>))
|
|
return std::nullopt;
|
|
|
|
if (buffer.hasCharactersRemaining() && !isManifestWhitespaceOrNewline(*buffer))
|
|
return std::nullopt;
|
|
|
|
// Skip to the end of the line.
|
|
skipUntil<isManifestNewline>(buffer);
|
|
|
|
while (1) {
|
|
// Skip whitespace
|
|
skipWhile<isManifestWhitespaceOrNewline>(buffer);
|
|
|
|
if (buffer.atEnd())
|
|
break;
|
|
|
|
auto lineStart = buffer.position();
|
|
|
|
// Find the end of the line
|
|
skipUntil<isManifestNewline>(buffer);
|
|
|
|
// Line is a comment, skip to the next line.
|
|
if (*lineStart == '#')
|
|
continue;
|
|
|
|
// Get rid of trailing whitespace
|
|
auto lineEnd = buffer.position() - 1;
|
|
while (lineEnd > lineStart && isManifestWhitespace(*lineEnd))
|
|
--lineEnd;
|
|
|
|
auto lineBuffer = StringParsingBuffer { lineStart, lineEnd + 1 };
|
|
|
|
if (lineBuffer[lineBuffer.lengthRemaining() - 1] == ':') {
|
|
if (skipCharactersExactly(lineBuffer, cacheModeIdentifier<CharacterType>) && lineBuffer.lengthRemaining() == 1) {
|
|
mode = ApplicationCacheParserMode::Explicit;
|
|
continue;
|
|
}
|
|
if (skipCharactersExactly(lineBuffer, fallbackModeIdentifier<CharacterType>) && lineBuffer.lengthRemaining() == 1) {
|
|
mode = ApplicationCacheParserMode::Fallback;
|
|
continue;
|
|
}
|
|
if (skipCharactersExactly(lineBuffer, networkModeIdentifier<CharacterType>) && lineBuffer.lengthRemaining() == 1) {
|
|
mode = ApplicationCacheParserMode::OnlineAllowlist;
|
|
continue;
|
|
}
|
|
|
|
// If the line (excluding the trailing whitespace) ends with a ':' and isn't one of the known mode
|
|
// headers, transition to the 'Unknown' mode.
|
|
mode = ApplicationCacheParserMode::Unknown;
|
|
continue;
|
|
}
|
|
|
|
switch (mode) {
|
|
case ApplicationCacheParserMode::Unknown:
|
|
continue;
|
|
|
|
case ApplicationCacheParserMode::Explicit: {
|
|
// Look for whitespace separating the URL from subsequent ignored tokens.
|
|
skipUntil<isManifestWhitespace>(lineBuffer);
|
|
|
|
auto url = makeManifestURL(manifestURL, lineStart, lineBuffer.position());
|
|
if (!url.isValid())
|
|
continue;
|
|
|
|
if (!equalIgnoringASCIICase(url.protocol(), manifestURL.protocol()))
|
|
continue;
|
|
|
|
if (manifestURL.protocolIs("https") && !protocolHostAndPortAreEqual(manifestURL, url))
|
|
continue;
|
|
|
|
manifest.explicitURLs.add(url.string());
|
|
continue;
|
|
}
|
|
|
|
case ApplicationCacheParserMode::OnlineAllowlist: {
|
|
// Look for whitespace separating the URL from subsequent ignored tokens.
|
|
skipUntil<isManifestWhitespace>(lineBuffer);
|
|
|
|
if (lineBuffer.position() - lineStart == 1 && *lineStart == '*') {
|
|
// Wildcard was found.
|
|
manifest.allowAllNetworkRequests = true;
|
|
continue;
|
|
}
|
|
|
|
auto url = makeManifestURL(manifestURL, lineStart, lineBuffer.position());
|
|
if (!url.isValid())
|
|
continue;
|
|
|
|
if (!equalIgnoringASCIICase(url.protocol(), manifestURL.protocol()))
|
|
continue;
|
|
|
|
manifest.onlineAllowedURLs.append(url);
|
|
continue;
|
|
}
|
|
|
|
case ApplicationCacheParserMode::Fallback: {
|
|
// Look for whitespace separating the two URLs
|
|
skipUntil<isManifestWhitespace>(lineBuffer);
|
|
|
|
if (lineBuffer.atEnd()) {
|
|
// There was no whitespace separating the URLs.
|
|
continue;
|
|
}
|
|
|
|
auto namespaceURL = makeManifestURL(manifestURL, lineStart, lineBuffer.position());
|
|
if (!namespaceURL.isValid())
|
|
continue;
|
|
|
|
if (!protocolHostAndPortAreEqual(manifestURL, namespaceURL))
|
|
continue;
|
|
|
|
// Although <https://html.spec.whatwg.org/multipage/offline.html#parsing-cache-manifests> (07/06/2017) saids
|
|
// that we should always prefix match the manifest path we only do so if the manifest was served with a non-
|
|
// standard HTTP Content-Type header for web compatibility.
|
|
if (!allowFallbackNamespaceOutsideManifestPath && !namespaceURL.path().startsWith(manifestPath))
|
|
continue;
|
|
|
|
// Skip whitespace separating fallback namespace from URL.
|
|
skipWhile<isManifestWhitespace>(lineBuffer);
|
|
|
|
auto fallbackStart = lineBuffer.position();
|
|
|
|
// Look for whitespace separating the URL from subsequent ignored tokens.
|
|
skipUntil<isManifestWhitespace>(lineBuffer);
|
|
|
|
auto fallbackURL = makeManifestURL(manifestURL, fallbackStart, lineBuffer.position());
|
|
if (!fallbackURL.isValid())
|
|
continue;
|
|
|
|
if (!protocolHostAndPortAreEqual(manifestURL, fallbackURL))
|
|
continue;
|
|
|
|
manifest.fallbackURLs.append(std::make_pair(namespaceURL, fallbackURL));
|
|
continue;
|
|
}
|
|
}
|
|
|
|
ASSERT_NOT_REACHED();
|
|
}
|
|
|
|
return manifest;
|
|
});
|
|
}
|
|
|
|
}
|