/* * Copyright (C) 2004-2020 Apple Inc. All rights reserved. * Copyright (C) 2012 Research In Motion Limited. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include #include "URLParser.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace WTF { void URL::invalidate() { m_isValid = false; m_protocolIsInHTTPFamily = false; m_cannotBeABaseURL = false; m_schemeEnd = 0; m_userStart = 0; m_userEnd = 0; m_passwordEnd = 0; m_hostEnd = 0; m_portLength = 0; m_pathEnd = 0; m_pathAfterLastSlash = 0; m_queryEnd = 0; } URL::URL(const URL& base, const String& relative, const URLTextEncoding* encoding) { *this = URLParser(relative, base, encoding).result(); } static bool shouldTrimFromURL(UChar character) { // Ignore leading/trailing whitespace and control characters. return character <= ' '; } URL URL::isolatedCopy() const { URL result = *this; result.m_string = result.m_string.isolatedCopy(); return result; } StringView URL::lastPathComponent() const { if (!hasPath()) return { }; unsigned end = m_pathEnd - 1; if (m_string[end] == '/') --end; size_t start = m_string.reverseFind('/', end); if (start < pathStart()) return { }; ++start; return StringView(m_string).substring(start, end - start + 1); } bool URL::hasSpecialScheme() const { // https://url.spec.whatwg.org/#special-scheme return protocolIs("ftp") || protocolIs("file") || protocolIs("http") || protocolIs("https") || protocolIs("ws") || protocolIs("wss"); } unsigned URL::pathStart() const { unsigned start = m_hostEnd + m_portLength; if (start == m_schemeEnd + 1U && start + 1 < m_string.length() && m_string[start] == '/' && m_string[start + 1] == '.') start += 2; return start; } StringView URL::protocol() const { if (!m_isValid) return { }; return StringView(m_string).substring(0, m_schemeEnd); } StringView URL::host() const { if (!m_isValid) return { }; unsigned start = hostStart(); return StringView(m_string).substring(start, m_hostEnd - start); } std::optional URL::port() const { return m_portLength ? parseInteger(StringView(m_string).substring(m_hostEnd + 1, m_portLength - 1)) : std::nullopt; } String URL::hostAndPort() const { if (auto port = this->port()) return makeString(host(), ':', port.value()); return host().toString(); } String URL::protocolHostAndPort() const { if (!hasCredentials()) return m_string.substring(0, pathStart()); return makeString( StringView(m_string).substring(0, m_userStart), StringView(m_string).substring(hostStart(), pathStart() - hostStart()) ); } static std::optional decodeEscapeSequence(StringView input, unsigned index, unsigned length) { if (index + 3 > length || input[index] != '%') return std::nullopt; auto digit1 = input[index + 1]; auto digit2 = input[index + 2]; if (!isASCIIHexDigit(digit1) || !isASCIIHexDigit(digit2)) return std::nullopt; return toASCIIHexValue(digit1, digit2); } static String decodeEscapeSequencesFromParsedURL(StringView input) { ASSERT(input.isAllASCII()); auto length = input.length(); if (length < 3 || !input.contains('%')) return input.toString(); // FIXME: This 100 is arbitrary. Should make a histogram of how this function is actually used to choose a better value. Vector percentDecoded; percentDecoded.reserveInitialCapacity(length); for (unsigned i = 0; i < length; ) { if (auto decodedCharacter = decodeEscapeSequence(input, i, length)) { percentDecoded.uncheckedAppend(*decodedCharacter); i += 3; } else { percentDecoded.uncheckedAppend(input[i]); ++i; } } // FIXME: Is UTF-8 always the correct encoding? // FIXME: This returns a null string when we encounter an invalid UTF-8 sequence. Is that OK? return String::fromUTF8(percentDecoded.data(), percentDecoded.size()); } String URL::user() const { return decodeEscapeSequencesFromParsedURL(encodedUser()); } String URL::password() const { return decodeEscapeSequencesFromParsedURL(encodedPassword()); } StringView URL::encodedUser() const { return StringView(m_string).substring(m_userStart, m_userEnd - m_userStart); } StringView URL::encodedPassword() const { if (m_passwordEnd == m_userEnd) return { }; return StringView(m_string).substring(m_userEnd + 1, m_passwordEnd - m_userEnd - 1); } StringView URL::fragmentIdentifier() const { if (!hasFragmentIdentifier()) return { }; return StringView(m_string).substring(m_queryEnd + 1); } URL URL::truncatedForUseAsBase() const { return URL(URL(), m_string.left(m_pathAfterLastSlash)); } #if !USE(CF) String URL::fileSystemPath() const { if (!isLocalFile()) return { }; auto result = decodeEscapeSequencesFromParsedURL(path()); #if PLATFORM(WIN) result = fileSystemRepresentation(result); #endif return result; } #endif #if !ASSERT_ENABLED static inline void assertProtocolIsGood(StringView) { } #else static void assertProtocolIsGood(StringView protocol) { // FIXME: We probably don't need this function any more. // The isASCIIAlphaCaselessEqual function asserts that passed-in characters // are ones it can handle; the older code did not and relied on these checks. for (auto character : protocol.codeUnits()) { ASSERT(isASCII(character)); ASSERT(character > ' '); ASSERT(!isASCIIUpper(character)); ASSERT(toASCIILowerUnchecked(character) == character); } } #endif static Lock defaultPortForProtocolMapForTestingLock; using DefaultPortForProtocolMapForTesting = HashMap; static DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMapForTesting() WTF_REQUIRES_LOCK(defaultPortForProtocolMapForTestingLock) { static DefaultPortForProtocolMapForTesting* defaultPortForProtocolMap; return defaultPortForProtocolMap; } static DefaultPortForProtocolMapForTesting& ensureDefaultPortForProtocolMapForTesting() WTF_REQUIRES_LOCK(defaultPortForProtocolMapForTestingLock) { DefaultPortForProtocolMapForTesting*& defaultPortForProtocolMap = defaultPortForProtocolMapForTesting(); if (!defaultPortForProtocolMap) defaultPortForProtocolMap = new DefaultPortForProtocolMapForTesting; return *defaultPortForProtocolMap; } void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol) { Locker locker { defaultPortForProtocolMapForTestingLock }; ensureDefaultPortForProtocolMapForTesting().add(protocol, port); } void clearDefaultPortForProtocolMapForTesting() { Locker locker { defaultPortForProtocolMapForTestingLock }; if (auto* map = defaultPortForProtocolMapForTesting()) map->clear(); } std::optional defaultPortForProtocol(StringView protocol) { { Locker locker { defaultPortForProtocolMapForTestingLock }; if (auto* overrideMap = defaultPortForProtocolMapForTesting()) { auto iterator = overrideMap->find(protocol.toStringWithoutCopying()); if (iterator != overrideMap->end()) return iterator->value; } } return URLParser::defaultPortForProtocol(protocol); } bool isDefaultPortForProtocol(uint16_t port, StringView protocol) { return defaultPortForProtocol(protocol) == port; } bool URL::protocolIsJavaScript() const { return WTF::protocolIsJavaScript(string()); } bool URL::protocolIs(const char* protocol) const { assertProtocolIsGood(protocol); // JavaScript URLs are "valid" and should be executed even if URL decides they are invalid. // The free function protocolIsJavaScript() should be used instead. ASSERT(!equalLettersIgnoringASCIICase(StringView(protocol), "javascript")); if (!m_isValid) return false; // Do the comparison without making a new string object. for (unsigned i = 0; i < m_schemeEnd; ++i) { if (!protocol[i] || !isASCIIAlphaCaselessEqual(m_string[i], protocol[i])) return false; } return !protocol[m_schemeEnd]; // We should have consumed all characters in the argument. } bool URL::protocolIs(StringView protocol) const { assertProtocolIsGood(protocol); if (!m_isValid) return false; if (m_schemeEnd != protocol.length()) return false; // Do the comparison without making a new string object. for (unsigned i = 0; i < m_schemeEnd; ++i) { if (!isASCIIAlphaCaselessEqual(m_string[i], protocol[i])) return false; } return true; } StringView URL::query() const { if (m_queryEnd == m_pathEnd) return { }; return StringView(m_string).substring(m_pathEnd + 1, m_queryEnd - (m_pathEnd + 1)); } StringView URL::path() const { if (!m_isValid) return { }; return StringView(m_string).substring(pathStart(), m_pathEnd - pathStart()); } bool URL::setProtocol(StringView newProtocol) { // Firefox and IE remove everything after the first ':'. auto newProtocolPrefix = newProtocol.substring(0, newProtocol.find(':')); auto newProtocolCanonicalized = URLParser::maybeCanonicalizeScheme(newProtocolPrefix); if (!newProtocolCanonicalized) return false; if (!m_isValid) { parse(makeString(*newProtocolCanonicalized, ':', m_string)); return true; } if ((m_passwordEnd != m_userStart || port()) && *newProtocolCanonicalized == "file") return true; if (isLocalFile() && host().isEmpty()) return true; parse(makeString(*newProtocolCanonicalized, StringView(m_string).substring(m_schemeEnd))); return true; } // Appends the punycoded hostname identified by the given string and length to // the output buffer. The result will not be null terminated. // Return value of false means error in encoding. static bool appendEncodedHostname(Vector& buffer, StringView string) { // hostnameBuffer needs to be big enough to hold an IDN-encoded name. // For host names bigger than this, we won't do IDN encoding, which is almost certainly OK. if (string.length() > URLParser::hostnameBufferLength || string.isAllASCII()) { append(buffer, string); return true; } UChar hostnameBuffer[URLParser::hostnameBufferLength]; UErrorCode error = U_ZERO_ERROR; UIDNAInfo processingDetails = UIDNA_INFO_INITIALIZER; int32_t numCharactersConverted = uidna_nameToASCII(&URLParser::internationalDomainNameTranscoder(), string.upconvertedCharacters(), string.length(), hostnameBuffer, URLParser::hostnameBufferLength, &processingDetails, &error); if (U_SUCCESS(error) && !(processingDetails.errors & ~URLParser::allowedNameToASCIIErrors) && numCharactersConverted) { buffer.append(hostnameBuffer, numCharactersConverted); return true; } return false; } unsigned URL::hostStart() const { return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1; } unsigned URL::credentialsEnd() const { // Include '@' too if we have it. unsigned end = m_passwordEnd; if (end != m_hostEnd && m_string[end] == '@') end += 1; return end; } static bool forwardSlashHashOrQuestionMark(UChar c) { return c == '/' || c == '#' || c == '?'; } static bool slashHashOrQuestionMark(UChar c) { return forwardSlashHashOrQuestionMark(c) || c == '\\'; } void URL::setHost(StringView newHost) { if (!m_isValid) return; if (newHost.contains(':') && !newHost.startsWith('[')) return; if (auto index = newHost.find(hasSpecialScheme() ? slashHashOrQuestionMark : forwardSlashHashOrQuestionMark); index != notFound) newHost = newHost.substring(0, index); Vector encodedHostName; if (hasSpecialScheme() && !appendEncodedHostname(encodedHostName, newHost)) return; bool slashSlashNeeded = m_userStart == m_schemeEnd + 1U; parse(makeString( StringView(m_string).left(hostStart()), slashSlashNeeded ? "//" : "", hasSpecialScheme() ? StringView(encodedHostName.data(), encodedHostName.size()) : newHost, StringView(m_string).substring(m_hostEnd) )); } void URL::setPort(std::optional port) { if (!m_isValid) return; if (!port) { remove(m_hostEnd, m_portLength); return; } parse(makeString( StringView(m_string).left(m_hostEnd), ':', static_cast(*port), StringView(m_string).substring(pathStart()) )); } void URL::setHostAndPort(StringView hostAndPort) { if (!m_isValid) return; auto hostName = hostAndPort; StringView portString; auto colonIndex = hostName.reverseFind(':'); if (colonIndex != notFound) { portString = hostName.substring(colonIndex + 1); hostName = hostName.substring(0, colonIndex); // Multiple colons are acceptable only in case of IPv6. if (hostName.contains(':') && !hostName.startsWith('[')) return; if (!parseInteger(portString)) portString = { }; } if (hostName.isEmpty()) { remove(hostStart(), pathStart() - hostStart()); return; } Vector encodedHostName; if (hasSpecialScheme() && !appendEncodedHostname(encodedHostName, hostName)) return; bool slashSlashNeeded = m_userStart == m_schemeEnd + 1U; parse(makeString( StringView(m_string).left(hostStart()), slashSlashNeeded ? "//" : "", hasSpecialScheme() ? StringView(encodedHostName.data(), encodedHostName.size()) : hostName, portString.isEmpty() ? "" : ":", portString, StringView(m_string).substring(pathStart()) )); } template static String percentEncodeCharacters(const StringType& input, bool(*shouldEncode)(UChar)) { auto encode = [shouldEncode] (const StringType& input) { CString utf8 = input.utf8(); auto* data = utf8.data(); StringBuilder builder; auto length = utf8.length(); for (unsigned j = 0; j < length; j++) { auto c = data[j]; if (shouldEncode(c)) { builder.append('%'); builder.append(upperNibbleToASCIIHexDigit(c)); builder.append(lowerNibbleToASCIIHexDigit(c)); } else builder.append(c); } return builder.toString(); }; for (size_t i = 0; i < input.length(); ++i) { if (UNLIKELY(shouldEncode(input[i]))) return encode(input); } if constexpr (std::is_same_v) return input.toString(); else return input; } void URL::parse(const String& string) { *this = URLParser(string).result(); } void URL::remove(unsigned start, unsigned length) { if (!length) return; ASSERT(start < m_string.length()); ASSERT(length <= m_string.length() - start); auto stringAfterRemoval = WTFMove(m_string); stringAfterRemoval.remove(start, length); parse(stringAfterRemoval); } void URL::setUser(StringView newUser) { if (!m_isValid) return; unsigned end = m_userEnd; if (!newUser.isEmpty()) { bool slashSlashNeeded = m_userStart == m_schemeEnd + 1U; bool needSeparator = end == m_hostEnd || (end == m_passwordEnd && m_string[end] != '@'); parse(makeString( StringView(m_string).left(m_userStart), slashSlashNeeded ? "//" : "", percentEncodeCharacters(newUser, URLParser::isInUserInfoEncodeSet), needSeparator ? "@" : "", StringView(m_string).substring(end) )); } else { // Remove '@' if we now have neither user nor password. if (m_userEnd == m_passwordEnd && end != m_hostEnd && m_string[end] == '@') end += 1; remove(m_userStart, end - m_userStart); } } void URL::setPassword(StringView newPassword) { if (!m_isValid) return; if (!newPassword.isEmpty()) { bool needLeadingSlashes = m_userEnd == m_schemeEnd + 1U; parse(makeString( StringView(m_string).left(m_userEnd), needLeadingSlashes ? "//:" : ":", percentEncodeCharacters(newPassword, URLParser::isInUserInfoEncodeSet), '@', StringView(m_string).substring(credentialsEnd()) )); } else { unsigned end = m_userStart == m_userEnd ? credentialsEnd() : m_passwordEnd; remove(m_userEnd, end - m_userEnd); } } void URL::removeCredentials() { if (!m_isValid) return; remove(m_userStart, credentialsEnd() - m_userStart); } void URL::setFragmentIdentifier(StringView identifier) { if (!m_isValid) return; *this = URLParser(makeString(StringView(m_string).left(m_queryEnd), '#', identifier), { }, URLTextEncodingSentinelAllowingC0AtEndOfHash).result(); } void URL::removeFragmentIdentifier() { if (!m_isValid) return; m_string = m_string.left(m_queryEnd); } void URL::removeQueryAndFragmentIdentifier() { if (!m_isValid) return; m_string = m_string.left(m_pathEnd); m_queryEnd = m_pathEnd; } void URL::setQuery(StringView newQuery) { // FIXME: Consider renaming this function to setEncodedQuery and/or calling percentEncodeCharacters the way setPath does. // https://webkit.org/b/161176 if (!m_isValid) return; parse(makeString( StringView(m_string).left(m_pathEnd), (!newQuery.startsWith('?') && !newQuery.isNull()) ? "?" : "", newQuery, StringView(m_string).substring(m_queryEnd) )); } static String escapePathWithoutCopying(StringView path) { auto questionMarkOrNumberSignOrNonASCII = [] (UChar character) { return character == '?' || character == '#' || !isASCII(character); }; return percentEncodeCharacters(path, questionMarkOrNumberSignOrNonASCII); } void URL::setPath(StringView path) { if (!m_isValid) return; parse(makeString( StringView(m_string).left(pathStart()), path.startsWith('/') || (path.startsWith('\\') && (hasSpecialScheme() || protocolIs("file"))) || (!hasSpecialScheme() && path.isEmpty() && m_schemeEnd + 1U < pathStart()) ? "" : "/", !hasSpecialScheme() && host().isEmpty() && path.startsWith("//") && path.length() > 2 ? "/." : "", escapePathWithoutCopying(path), StringView(m_string).substring(m_pathEnd) )); } StringView URL::stringWithoutQueryOrFragmentIdentifier() const { if (!m_isValid) return m_string; return StringView(m_string).left(pathEnd()); } StringView URL::stringWithoutFragmentIdentifier() const { if (!m_isValid) return m_string; return StringView(m_string).left(m_queryEnd); } bool equalIgnoringFragmentIdentifier(const URL& a, const URL& b) { return a.stringWithoutFragmentIdentifier() == b.stringWithoutFragmentIdentifier(); } bool protocolHostAndPortAreEqual(const URL& a, const URL& b) { if (a.m_schemeEnd != b.m_schemeEnd) return false; unsigned hostStartA = a.hostStart(); unsigned hostLengthA = a.m_hostEnd - hostStartA; unsigned hostStartB = b.hostStart(); unsigned hostLengthB = b.m_hostEnd - b.hostStart(); if (hostLengthA != hostLengthB) return false; // Check the scheme for (unsigned i = 0; i < a.m_schemeEnd; ++i) { if (a.string()[i] != b.string()[i]) return false; } // And the host for (unsigned i = 0; i < hostLengthA; ++i) { if (a.string()[hostStartA + i] != b.string()[hostStartB + i]) return false; } if (a.port() != b.port()) return false; return true; } bool URL::isMatchingDomain(StringView domain) const { // FIXME: Consider moving this to an appropriate place in WebCore's plug-in code; don't want people tempted to use this instead of SecurityOrigin. if (isNull()) return false; if (domain.isEmpty()) return true; if (!protocolIsInHTTPFamily()) return false; auto host = this->host(); if (!host.endsWith(domain)) return false; return host.length() == domain.length() || host[host.length() - domain.length() - 1] == '.'; } // FIXME: Rename this so it's clear that it does the appropriate escaping for URL query field values. String encodeWithURLEscapeSequences(const String& input) { return percentEncodeCharacters(input, URLParser::isInUserInfoEncodeSet); } bool URL::isHierarchical() const { if (!m_isValid) return false; ASSERT(m_string[m_schemeEnd] == ':'); return m_string[m_schemeEnd + 1] == '/'; } static bool protocolIsInternal(StringView string, const char* protocol) { assertProtocolIsGood(protocol); bool isLeading = true; for (auto codeUnit : string.codeUnits()) { if (isLeading) { // Skip leading whitespace and control characters. if (shouldTrimFromURL(codeUnit)) continue; isLeading = false; } else { // Skip tabs and newlines even later in the protocol. if (codeUnit == '\t' || codeUnit == '\r' || codeUnit == '\n') continue; } char expectedCharacter = *protocol++; if (!expectedCharacter) return codeUnit == ':'; if (!isASCIIAlphaCaselessEqual(codeUnit, expectedCharacter)) return false; } return false; } bool protocolIs(StringView string, const char* protocol) { return protocolIsInternal(string, protocol); } #ifndef NDEBUG void URL::print() const { printf("%s\n", m_string.utf8().data()); } #endif void URL::dump(PrintStream& out) const { out.print(m_string); } String URL::strippedForUseAsReferrer() const { if (!m_isValid) return m_string; unsigned end = credentialsEnd(); if (m_userStart == end && m_queryEnd == m_string.length()) return m_string; return makeString( StringView(m_string).substring(0, m_userStart), StringView(m_string).substring(end, m_queryEnd - end) ); } bool URL::isLocalFile() const { // Including feed here might be a bad idea since drag and drop uses this check // and including feed would allow feeds to potentially let someone's blog // read the contents of the clipboard on a drag, even without a drop. // Likewise with using the FrameLoader::shouldTreatURLAsLocal() function. return protocolIs("file"); } bool protocolIsJavaScript(StringView string) { return protocolIsInternal(string, "javascript"); } bool protocolIsInHTTPFamily(StringView url) { auto length = url.length(); // Do the comparison without making a new string object. return length >= 5 && isASCIIAlphaCaselessEqual(url[0], 'h') && isASCIIAlphaCaselessEqual(url[1], 't') && isASCIIAlphaCaselessEqual(url[2], 't') && isASCIIAlphaCaselessEqual(url[3], 'p') && (url[4] == ':' || (isASCIIAlphaCaselessEqual(url[4], 's') && length >= 6 && url[5] == ':')); } static StaticStringImpl aboutBlankString { "about:blank" }; const URL& aboutBlankURL() { static LazyNeverDestroyed staticBlankURL; static std::once_flag onceFlag; std::call_once(onceFlag, [&] { staticBlankURL.construct(URL(), &aboutBlankString); }); return staticBlankURL; } static StaticStringImpl aboutSrcDocString { "about:srcdoc" }; const URL& aboutSrcDocURL() { static LazyNeverDestroyed staticSrcDocURL; static std::once_flag onceFlag; std::call_once(onceFlag, [&] { staticSrcDocURL.construct(URL(), &aboutSrcDocString); }); return staticSrcDocURL; } bool URL::protocolIsAbout() const { return protocolIs("about"); } bool portAllowed(const URL& url) { std::optional port = url.port(); // Since most URLs don't have a port, return early for the "no port" case. if (!port) return true; // This blocked port list matches the port blocking that Mozilla implements. // See http://www.mozilla.org/projects/netlib/PortBanning.html for more information. static const uint16_t blockedPortList[] = { 1, // tcpmux 7, // echo 9, // discard 11, // systat 13, // daytime 15, // netstat 17, // qotd 19, // chargen 20, // FTP-data 21, // FTP-control 22, // SSH 23, // telnet 25, // SMTP 37, // time 42, // name 43, // nicname 53, // domain 69, // TFTP 77, // priv-rjs 79, // finger 87, // ttylink 95, // supdup 101, // hostriame 102, // iso-tsap 103, // gppitnp 104, // acr-nema 109, // POP2 110, // POP3 111, // sunrpc 113, // auth 115, // SFTP 117, // uucp-path 119, // nntp 123, // NTP 135, // loc-srv / epmap 137, // NetBIOS 139, // netbios 143, // IMAP2 161, // SNMP 179, // BGP 389, // LDAP 427, // SLP (Also used by Apple Filing Protocol) 465, // SMTP+SSL 512, // print / exec 513, // login 514, // shell 515, // printer 526, // tempo 530, // courier 531, // Chat 532, // netnews 540, // UUCP 548, // afpovertcp [Apple addition] 554, // rtsp 556, // remotefs 563, // NNTP+SSL 587, // ESMTP 601, // syslog-conn 636, // LDAP+SSL 989, // ftps-data 990, // ftps 993, // IMAP+SSL 995, // POP3+SSL 1719, // H323 (RAS) 1720, // H323 (Q931) 1723, // H323 (H245) 2049, // NFS 3659, // apple-sasl / PasswordServer [Apple addition] 4045, // lockd 4190, // ManageSieve [Apple addition] 5060, // SIP 5061, // SIPS 6000, // X11 6566, // SANE 6665, // Alternate IRC [Apple addition] 6666, // Alternate IRC [Apple addition] 6667, // Standard IRC [Apple addition] 6668, // Alternate IRC [Apple addition] 6669, // Alternate IRC [Apple addition] 6679, // Alternate IRC SSL [Apple addition] 6697, // IRC+SSL [Apple addition] 10080, // amanda }; // If the port is not in the blocked port list, allow it. ASSERT(std::is_sorted(std::begin(blockedPortList), std::end(blockedPortList))); if (!std::binary_search(std::begin(blockedPortList), std::end(blockedPortList), port.value())) return true; // Allow ports 21 and 22 for FTP URLs, as Mozilla does. if ((port.value() == 21 || port.value() == 22) && url.protocolIs("ftp")) return true; // Allow any port number in a file URL, since the port number is ignored. if (url.protocolIs("file")) return true; return false; } String mimeTypeFromDataURL(StringView dataURL) { ASSERT(protocolIsInternal(dataURL, "data")); // FIXME: What's the right behavior when the URL has a comma first, but a semicolon later? // Currently this code will break at the semicolon in that case; should add a test. auto index = dataURL.find(';', 5); if (index == notFound) index = dataURL.find(',', 5); if (index == notFound) { // FIXME: There was an old comment here that made it sound like this should be returning text/plain. // But we have been returning empty string here for some time, so not changing its behavior at this time. return emptyString(); } if (index == 5) return "text/plain"_s; ASSERT(index >= 5); return dataURL.substring(5, index - 5).convertToASCIILowercase(); } String URL::stringCenterEllipsizedToLength(unsigned length) const { if (m_string.length() <= length) return m_string; return makeString(StringView(m_string).left(length / 2 - 1), "...", StringView(m_string).right(length / 2 - 2)); } URL URL::fakeURLWithRelativePart(StringView relativePart) { return URL(URL(), makeString("webkit-fake-url://", createCanonicalUUIDString(), '/', relativePart)); } URL URL::fileURLWithFileSystemPath(StringView path) { return URL(URL(), makeString( "file://", path.startsWith('/') ? "" : "/", escapePathWithoutCopying(path) )); } StringView URL::queryWithLeadingQuestionMark() const { if (m_queryEnd <= m_pathEnd) return { }; return StringView(m_string).substring(m_pathEnd, m_queryEnd - m_pathEnd); } StringView URL::fragmentIdentifierWithLeadingNumberSign() const { if (!m_isValid || m_string.length() <= m_queryEnd) return { }; return StringView(m_string).substring(m_queryEnd); } bool URL::isAboutBlank() const { return protocolIsAbout() && path() == "blank"; } bool URL::isAboutSrcDoc() const { return protocolIsAbout() && path() == "srcdoc"; } TextStream& operator<<(TextStream& ts, const URL& url) { ts << url.string(); return ts; } #if !PLATFORM(COCOA) && !USE(SOUP) static bool isIPv4Address(StringView string) { auto count = 0; for (const auto octet : string.splitAllowingEmptyEntries('.')) { if (count >= 4) return false; const auto length = octet.length(); if (!length || length > 3) return false; auto value = 0; for (auto i = 0u; i < length; ++i) { const auto digit = octet[i]; // Prohibit leading zeroes. if (digit > '9' || digit < (!i && length > 1 ? '1' : '0')) return false; value = 10 * value + (digit - '0'); } if (value > 255) return false; count++; } return (count == 4); } static bool isIPv6Address(StringView string) { enum SkipState { None, WillSkip, Skipping, Skipped, Final }; auto skipState = None; auto count = 0; for (const auto hextet : string.splitAllowingEmptyEntries(':')) { if (count >= 8 || skipState == Final) return false; const auto length = hextet.length(); if (!length) { // :: may be used anywhere to skip 1 to 8 hextets, but only once. if (skipState == Skipped) return false; if (skipState == None) skipState = !count ? WillSkip : Skipping; else if (skipState == WillSkip) skipState = Skipping; else skipState = Final; continue; } if (skipState == WillSkip) return false; if (skipState == Skipping) skipState = Skipped; if (length > 4) { // An IPv4 address may be used in place of the final two hextets. if ((skipState == None && count != 6) || (skipState == Skipped && count >= 6) || !isIPv4Address(hextet)) return false; skipState = Final; continue; } for (const auto codeUnit : hextet.codeUnits()) { // IPv6 allows leading zeroes. if (!isASCIIHexDigit(codeUnit)) return false; } count++; } return (count == 8 && skipState == None) || skipState == Skipped || skipState == Final; } bool URL::hostIsIPAddress(StringView host) { return host.contains(':') ? isIPv6Address(host) : isIPv4Address(host); } #endif Vector> differingQueryParameters(const URL& firstURL, const URL& secondURL) { auto firstQueryParameters = URLParser::parseURLEncodedForm(firstURL.query()); auto secondQueryParameters = URLParser::parseURLEncodedForm(secondURL.query()); if (firstQueryParameters.isEmpty()) return secondQueryParameters; if (secondQueryParameters.isEmpty()) return firstQueryParameters; auto compare = [] (const KeyValuePair& a, const KeyValuePair& b) { if (int result = codePointCompare(a.key, b.key)) return result; return codePointCompare(a.value, b.value); }; auto comparesLessThan = [compare] (const KeyValuePair& a, const KeyValuePair& b) { return compare(a, b) < 0; }; std::sort(firstQueryParameters.begin(), firstQueryParameters.end(), comparesLessThan); std::sort(secondQueryParameters.begin(), secondQueryParameters.end(), comparesLessThan); size_t totalFirstQueryParameters = firstQueryParameters.size(); size_t totalSecondQueryParameters = secondQueryParameters.size(); size_t indexInFirstQueryParameters = 0; size_t indexInSecondQueryParameters = 0; Vector> differingQueryParameters; while (indexInFirstQueryParameters < totalFirstQueryParameters && indexInSecondQueryParameters < totalSecondQueryParameters) { int comparison = compare(firstQueryParameters[indexInFirstQueryParameters], secondQueryParameters[indexInSecondQueryParameters]); if (comparison < 0) { differingQueryParameters.append(firstQueryParameters[indexInFirstQueryParameters]); indexInFirstQueryParameters++; } else if (comparison > 0) { differingQueryParameters.append(secondQueryParameters[indexInSecondQueryParameters]); indexInSecondQueryParameters++; } else { indexInFirstQueryParameters++; indexInSecondQueryParameters++; } } while (indexInFirstQueryParameters < totalFirstQueryParameters) { differingQueryParameters.append(firstQueryParameters[indexInFirstQueryParameters]); indexInFirstQueryParameters++; } while (indexInSecondQueryParameters < totalSecondQueryParameters) { differingQueryParameters.append(secondQueryParameters[indexInSecondQueryParameters]); indexInSecondQueryParameters++; } return differingQueryParameters; } static StringView substringIgnoringQueryAndFragments(const URL& url) { if (!url.isValid()) return StringView(url.string()); return StringView(url.string()).left(url.pathEnd()); } bool isEqualIgnoringQueryAndFragments(const URL& a, const URL& b) { return substringIgnoringQueryAndFragments(a) == substringIgnoringQueryAndFragments(b); } void removeQueryParameters(URL& url, const HashSet& keysToRemove) { if (keysToRemove.isEmpty()) return; StringBuilder queryWithoutRemovalKeys; for (auto& parameter : URLParser::parseURLEncodedForm(url.query())) { if (!keysToRemove.contains(parameter.key)) queryWithoutRemovalKeys.append(queryWithoutRemovalKeys.isEmpty() ? "" : "&", parameter.key, '=', parameter.value); } url.setQuery(queryWithoutRemovalKeys); } } // namespace WTF