haikuwebkit/Tools/Scripts/webkitpy/port/leakdetector_valgrind.py

# Copyright (C) 2013 Samsung Electronics. All rights reserved.
#
# Based on code from Chromium, copyright as follows:
#
# Copyright (c) 2013 The Chromium Authors. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from collections import defaultdict
import hashlib
import logging
import re
from xml.dom.minidom import parseString
from xml.parsers.expat import ExpatError

from webkitcorepy import string_utils

_log = logging.getLogger(__name__)


def get_text_of(top_node, name):
    """ Returns all text in all DOM nodes with a certain |name| that are children of |top_node|. """

    text = ""
    for nodes_named in top_node.getElementsByTagName(name):
        text += "".join([node.data for node in nodes_named.childNodes
                         if node.nodeType == node.TEXT_NODE])
    return text


def get_CDATA_of(top_node, name):
    """ Returns all CDATA in all DOM nodes with a certain |name| that are children of |top_node|. """

    text = ""
    for nodes_named in top_node.getElementsByTagName(name):
        text += "".join([node.data for node in nodes_named.childNodes
                         if node.nodeType == node.CDATA_SECTION_NODE])
    if (text == ""):
        return None
    return text


# Constants that give real names to the abbreviations in valgrind XML output.
INSTRUCTION_POINTER = "ip"
OBJECT_FILE = "obj"
FUNCTION_NAME = "fn"
SRC_FILE_DIR = "dir"
SRC_FILE_NAME = "file"
SRC_LINE = "line"


def gather_frames(node, source_dir):
    frame_dict = lambda frame: {
        INSTRUCTION_POINTER: get_text_of(frame, INSTRUCTION_POINTER),
        OBJECT_FILE: get_text_of(frame, OBJECT_FILE),
        FUNCTION_NAME: get_text_of(frame, FUNCTION_NAME),
        SRC_FILE_DIR: get_text_of(frame, SRC_FILE_DIR),
        SRC_FILE_NAME: get_text_of(frame, SRC_FILE_NAME),
        SRC_LINE: get_text_of(frame, SRC_LINE)}

    return [frame_dict(frame) for frame in node.getElementsByTagName("frame")]


class ValgrindError:

    def __init__(self, executive, source_dir, error_node):
        self._executive = executive
        self._kind = get_text_of(error_node, "kind")
        self._backtraces = []
        self._suppression = None
        self._additional = []

        # Iterate through the nodes, parsing <what|auxwhat><stack> pairs.
        description = None
        for node in error_node.childNodes:
            if node.localName == "what" or node.localName == "auxwhat":
                description = "".join([n.data for n in node.childNodes
                                       if n.nodeType == n.TEXT_NODE])
            elif node.localName == "xwhat":
                description = get_text_of(node, "text")
            elif node.localName == "stack":
                assert description
                self._backtraces.append([description, gather_frames(node, source_dir)])
                description = None
            elif node.localName == "origin":
                description = get_text_of(node, "what")
                stack = node.getElementsByTagName("stack")[0]
                frames = gather_frames(stack, source_dir)
                self._backtraces.append([description, frames])
                description = None
            elif description and node.localName != None:
                # The lastest description has no stack, e.g. "Address 0x28 is unknown".
                self._additional.append(description)
                description = None

            if node.localName == "suppression":
                self._suppression = get_CDATA_of(node, "rawtext")

    def __str__(self):
        output = self._kind + "\n"
        for backtrace in self._backtraces:
            output += backtrace[0] + "\n"

            buf = ""
            for frame in backtrace[1]:
                buf += (frame[FUNCTION_NAME] or frame[INSTRUCTION_POINTER]) + "\n"

            input = buf.encode('latin-1').split(b"\n")
            demangled_names = [self._executive.run_command(['c++filt', '-n', name]) for name in input if name]

            i = 0
            for frame in backtrace[1]:
                output += ("  " + demangled_names[i])
                i = i + 1

                if frame[SRC_FILE_DIR] != "":
                    output += (" (" + frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME] +
                               ":" + frame[SRC_LINE] + ")")
                else:
                    output += " (" + frame[OBJECT_FILE] + ")"
                output += "\n"

        for additional in self._additional:
            output += additional + "\n"

        assert self._suppression != None, "Your Valgrind doesn't generate " \
                                           "suppressions - is it too old?"

        output += "Suppression (error hash=#%016X#):\n" % self.error_hash()

        # Widen the suppressions slightly.
        supp = self._suppression
        supp = supp.replace("fun:_Znwj", "fun:_Znw*")
        supp = supp.replace("fun:_Znwm", "fun:_Znw*")
        supp = supp.replace("fun:_Znaj", "fun:_Zna*")
        supp = supp.replace("fun:_Znam", "fun:_Zna*")

        # Split into lines so we can enforce length limits.
        supplines = supp.split("\n")
        supp = None  # to avoid re-use

        # Truncate at line 26 (VG_MAX_SUPP_CALLERS plus 2 for name and type)
        # (https://bugs.kde.org/show_bug.cgi?id=199468 proposes raising
        # VG_MAX_SUPP_CALLERS, but we're probably fine with it as is.)
        newlen = min(26, len(supplines))

        if (len(supplines) > newlen):
            supplines = supplines[0:newlen]
            supplines.append("}")

        for frame in range(len(supplines)):
            # Replace the always-changing anonymous namespace prefix with "*".
            m = re.match(r"( +fun:)_ZN.*_GLOBAL__N_.*\.cc_" +
                          "[0-9a-fA-F]{8}_[0-9a-fA-F]{8}(.*)",
                          supplines[frame])
            if m:
                supplines[frame] = "*".join(m.groups())

        return output + "\n".join(supplines) + "\n"

    def unique_string(self):
        rep = self._kind + " "
        for backtrace in self._backtraces:
            for frame in backtrace[1]:
                rep += frame[FUNCTION_NAME]

                if frame[SRC_FILE_DIR] != "":
                    rep += frame[SRC_FILE_DIR] + "/" + frame[SRC_FILE_NAME]
                else:
                    rep += frame[OBJECT_FILE]
        return rep

    def error_hash(self):
        # This is a device-independent hash identifying the suppression.
        # By printing out this hash we can find duplicate reports between tests and
        # different shards running on multiple buildbots
        return int(hashlib.md5(string_utils.encode(self.unique_string())).hexdigest()[:16], 16)

    def __hash__(self):
        return hash(self.unique_string())

    def __eq__(self, rhs):
        return self.unique_string() == rhs


class LeakDetectorValgrind(object):

    def __init__(self, executive, filesystem, source_dir):
        self._executive = executive
        self._filesystem = filesystem
        self._source_dir = source_dir

        # Contains the set of unique errors.
        self._errors = set()
        # Contains all suppressions used.
        self._suppressions = defaultdict(int)

    def _parse_leaks_output(self, leaks_output):
        try:
            parsed_string = parseString(leaks_output)
        except ExpatError as e:
            _log.error("could not parse %s: %s" % (string_utils.decode(leaks_output, target_type=str), e))
            return

        cur_report_errors = set()

        commandline = None
        preamble = parsed_string.getElementsByTagName("preamble")[0]
        for node in preamble.getElementsByTagName("line"):
            if node.localName == "line":
                for x in node.childNodes:
                    if x.nodeType == node.TEXT_NODE and "Command" in x.data:
                        commandline = x.data
                        break

        raw_errors = parsed_string.getElementsByTagName("error")
        for raw_error in raw_errors:
            # Ignore "possible" leaks and InvalidRead/Write by default.
            if (get_text_of(raw_error, "kind") != "Leak_PossiblyLost") and \
                (get_text_of(raw_error, "kind") != "Leak_StillReachable") and \
                (get_text_of(raw_error, "kind") != "InvalidWrite") and \
                (get_text_of(raw_error, "kind") != "InvalidRead"):
                error = ValgrindError(self._executive, self._source_dir, raw_error)
                if error not in cur_report_errors:
                    # We haven't seen such errors doing this report yet...
                    if error in self._errors:
                        # ... but we saw it in earlier reports, e.g. previous UI test
                        cur_report_errors.add("This error was already printed in "
                                              "some other test, see 'hash=#%016X#'" % \
                            error.error_hash())
                    else:
                        # ... and we haven't seen it in other tests as well
                        self._errors.add(error)
                        cur_report_errors.add(error)

        suppcountlist = parsed_string.getElementsByTagName("suppcounts")
        if len(suppcountlist) > 0:
            suppcountlist = suppcountlist[0]
            for node in suppcountlist.getElementsByTagName("pair"):
                count = get_text_of(node, "count")
                name = get_text_of(node, "name")
                self._suppressions[name] += int(count)

        return cur_report_errors

    def leaks_files_in_results_directory(self):
        return self._filesystem.glob(self._filesystem.join(self._source_dir, "drt-*-leaks.xml"))

    def clean_leaks_files_from_results_directory(self):
        # Remove old Valgrind xml files before starting this run.
        leaks_files = self.leaks_files_in_results_directory()
        for f in leaks_files:
            self._filesystem.remove(f)

    def parse_and_print_leaks_detail(self, leaks_files):
        for f in leaks_files:
            leaks_output = self._filesystem.read_binary_file(f)
            detected_leaks = self._parse_leaks_output(leaks_output)

        _log.info("-----------------------------------------------------")
        _log.info("Suppressions used:")
        _log.info("  count name")
        for (name, count) in sorted(self._suppressions.items(), key=lambda pair: (pair[1], pair[0])):
            _log.info("%7d %s" % (count, name))
        _log.info("-----------------------------------------------------")

        if self._errors:
            _log.info("Valgrind detected %s leaks:" % len(self._errors))
            # Force the same order in Python 2 and Python 3
            for leak in sorted(self._errors, key=lambda error: error.unique_string()):
                _log.info(leak)