/* * Copyright (C) 2016-2018 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #pragma once #include #include #include #include struct UIDNA; namespace WTF { template class CodePointIterator; class URLParser { WTF_MAKE_FAST_ALLOCATED; public: constexpr static int allowedNameToASCIIErrors = UIDNA_ERROR_EMPTY_LABEL | UIDNA_ERROR_LABEL_TOO_LONG | UIDNA_ERROR_DOMAIN_NAME_TOO_LONG | UIDNA_ERROR_LEADING_HYPHEN | UIDNA_ERROR_TRAILING_HYPHEN | UIDNA_ERROR_HYPHEN_3_4; // Needs to be big enough to hold an IDN-encoded name. // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK. constexpr static size_t hostnameBufferLength = 2048; #define URLTextEncodingSentinelAllowingC0AtEndOfHash reinterpret_cast(-1) WTF_EXPORT_PRIVATE static bool allValuesEqual(const URL&, const URL&); WTF_EXPORT_PRIVATE static bool internalValuesConsistent(const URL&); using URLEncodedForm = Vector>; WTF_EXPORT_PRIVATE static URLEncodedForm parseURLEncodedForm(StringView); WTF_EXPORT_PRIVATE static String serialize(const URLEncodedForm&); WTF_EXPORT_PRIVATE static bool isSpecialScheme(StringView); WTF_EXPORT_PRIVATE static std::optional maybeCanonicalizeScheme(StringView scheme); static const UIDNA& internationalDomainNameTranscoder(); static bool isInUserInfoEncodeSet(UChar); static std::optional defaultPortForProtocol(StringView); private: URLParser(const String&, const URL& = { }, const URLTextEncoding* = nullptr); URL result() { return m_url; } friend class URL; URL m_url; Vector m_asciiBuffer; bool m_urlIsSpecial { false }; bool m_urlIsFile { false }; bool m_hostHasPercentOrNonASCII { false }; bool m_didSeeSyntaxViolation { false }; String m_inputString; const void* m_inputBegin { nullptr }; static constexpr size_t defaultInlineBufferSize = 2048; using LCharBuffer = Vector; template void parse(const CharacterType*, const unsigned length, const URL&, const URLTextEncoding*); template void parseAuthority(CodePointIterator); template bool parseHostAndPort(CodePointIterator); template bool parsePort(CodePointIterator&); void failure(); enum class ReportSyntaxViolation { No, Yes }; template void advance(CodePointIterator& iterator) { advance(iterator, iterator); } template void advance(CodePointIterator&, const CodePointIterator& iteratorForSyntaxViolationPosition); template bool takesTwoAdvancesUntilEnd(CodePointIterator); template void syntaxViolation(const CodePointIterator&); template bool isPercentEncodedDot(CodePointIterator); template bool isWindowsDriveLetter(CodePointIterator); template bool isSingleDotPathSegment(CodePointIterator); template bool isDoubleDotPathSegment(CodePointIterator); template bool shouldCopyFileURL(CodePointIterator); template bool checkLocalhostCodePoint(CodePointIterator&, UChar32); template bool isAtLocalhost(CodePointIterator); bool isLocalhost(StringView); template void consumeSingleDotPathSegment(CodePointIterator&); template void consumeDoubleDotPathSegment(CodePointIterator&); template void appendWindowsDriveLetter(CodePointIterator&); template size_t currentPosition(const CodePointIterator&); template void appendNumberToASCIIBuffer(UnsignedIntegerType); template void utf8PercentEncode(const CodePointIterator&); template void utf8QueryEncode(const CodePointIterator&); template std::optional domainToASCII(StringImpl&, const CodePointIterator& iteratorForSyntaxViolationPosition); template LCharBuffer percentDecode(const LChar*, size_t, const CodePointIterator& iteratorForSyntaxViolationPosition); static LCharBuffer percentDecode(const LChar*, size_t); static std::optional formURLDecode(StringView input); static bool hasForbiddenHostCodePoint(const LCharBuffer&); void percentEncodeByte(uint8_t); void appendToASCIIBuffer(UChar32); void appendToASCIIBuffer(const char*, size_t); void appendToASCIIBuffer(const LChar* characters, size_t size) { appendToASCIIBuffer(reinterpret_cast(characters), size); } template void encodeNonUTF8Query(const Vector& source, const URLTextEncoding&, CodePointIterator); void copyASCIIStringUntil(const String&, size_t length); bool copyBaseWindowsDriveLetter(const URL&); StringView parsedDataView(size_t start, size_t length); UChar parsedDataView(size_t position); template bool subdomainStartsWithXNDashDash(CodePointIterator); bool subdomainStartsWithXNDashDash(StringImpl&); bool needsNonSpecialDotSlash() const; void addNonSpecialDotSlash(); using IPv4Address = uint32_t; void serializeIPv4(IPv4Address); enum class IPv4ParsingError; enum class IPv4PieceParsingError; template Expected parseIPv4Host(const CodePointIterator&, CodePointIterator); template Expected parseIPv4Piece(CodePointIterator&, bool& syntaxViolation); using IPv6Address = std::array; template std::optional parseIPv6Host(CodePointIterator); template std::optional parseIPv4PieceInsideIPv6(CodePointIterator&); template std::optional parseIPv4AddressInsideIPv6(CodePointIterator); void serializeIPv6Piece(uint16_t piece); void serializeIPv6(IPv6Address); enum class URLPart; template void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator&, const URLTextEncoding*&); static size_t urlLengthUntilPart(const URL&, URLPart); void popPath(); bool shouldPopPath(unsigned); }; }