haikuwebkit/Tools/Scripts/webkitpy/benchmark_runner/benchmark_results.py

# Copyright (C) 2015 Apple Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1.  Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
# 2.  Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import math
import re
import sys

from webkitpy.common.iteration_compatibility import iteritems


class BenchmarkResults(object):

    aggregators = {
        'Total': (lambda values: sum(values)),
        'Arithmetic': (lambda values: sum(values) // len(values)),
        'Geometric': (lambda values: math.exp(sum(map(math.log, values)) / len(values))),
    }
    metric_to_unit = {
        'FrameRate': 'fps',
        'Runs': '/s',
        'Time': 'ms',
        'Duration': 'ms',
        'Malloc': 'B',
        'Heap': 'B',
        'Allocations': 'B',
        'Score': 'pt',
        'Power': 'W',
    }
    SI_prefixes = ['n', 'u', 'm', '', 'K', 'M', 'G', 'T', 'P', 'E']

    def __init__(self, results):
        self._lint_results(results)
        self._results = self._aggregate_results(results)

    def format(self, scale_unit=True, show_iteration_values=False, max_depth=sys.maxsize):
        return self._format_tests(self._results, scale_unit, show_iteration_values, max_depth)

    @classmethod
    def _format_tests(cls, tests, scale_unit, show_iteration_values, max_depth, indent=''):
        output = ''
        config_name = 'current'
        for test_name in sorted(tests.keys()):
            is_first = True
            test = tests[test_name]
            metrics = test.get('metrics', {})
            for metric_name in sorted(metrics.keys()):
                metric = metrics[metric_name]
                for aggregator_name in sorted(metric.keys()):
                    output += indent
                    if is_first:
                        output += test_name
                        is_first = False
                    else:
                        output += ' ' * len(test_name)
                    output += ':' + metric_name + ':'
                    if aggregator_name:
                        output += aggregator_name + ':'
                    output += ' ' + cls._format_values(metric_name, metric[aggregator_name][config_name], scale_unit, show_iteration_values) + '\n'
            if 'tests' in test and max_depth > 1:
                output += cls._format_tests(test['tests'], scale_unit, show_iteration_values, max_depth - 1, indent=(indent + ' ' * len(test_name)))
        return output

    @classmethod
    def _format_values(cls, metric_name, values, scale_unit=True, show_iteration_values=False):
        values = list(map(float, values))
        total = sum(values)
        mean = total / len(values)
        square_sum = sum([x * x for x in values])
        sample_count = len(values)

        # With sum and sum of squares, we can compute the sample standard deviation in O(1).
        # See https://rniwa.com/2012-11-10/sample-standard-deviation-in-terms-of-sum-and-square-sum-of-samples/
        if sample_count <= 1:
            sample_stdev = 0
        else:
            # Be careful about round-off error when sample_stdev is 0.
            sample_stdev = math.sqrt(max(0, square_sum / (sample_count - 1) - total * total / (sample_count - 1) / sample_count))

        unit = cls._unit_from_metric(metric_name)

        if not scale_unit:
            formatted_value = '{mean:.3f}{unit} stdev={delta:.1%}'.format(mean=mean, delta=sample_stdev / mean, unit=unit)
            if show_iteration_values:
                formatted_value += ' [' + ', '.join(['{value:.3f}'.format(value=value) for value in values]) + ']'
            return formatted_value

        if unit == 'ms':
            unit = 's'
            mean = float(mean) / 1000
            values = list([float(value) / 1000 for value in values])
            sample_stdev /= 1000

        base = 1024 if unit == 'B' else 1000
        value_sig_fig = 1 - math.floor(math.log10(sample_stdev / mean)) if sample_stdev else 3
        SI_magnitude = math.floor(math.log(mean, base))

        scaling_factor = math.pow(base, -SI_magnitude)
        scaled_mean = mean * scaling_factor
        SI_prefix = cls.SI_prefixes[int(SI_magnitude) + 3]

        non_floating_digits = 1 + math.floor(math.log10(scaled_mean))
        floating_points_count = max(0, value_sig_fig - non_floating_digits)

        def format_scaled(value):
            return ('{value:.' + str(int(floating_points_count)) + 'f}').format(value=value)

        formatted_value = '{mean}{prefix}{unit} stdev={delta:.1%}'.format(mean=format_scaled(scaled_mean), delta=sample_stdev / mean, prefix=SI_prefix, unit=unit)
        if show_iteration_values:
            formatted_value += ' [' + ', '.join([format_scaled(value * scaling_factor) for value in values]) + ']'
        return formatted_value

    @classmethod
    def _unit_from_metric(cls, metric_name):
        # FIXME: Detect unknown mettric names
        suffix = re.match(r'.*?([A-z][a-z]+|FrameRate)$', metric_name)
        return cls.metric_to_unit[suffix.group(1)]

    @classmethod
    def _aggregate_results(cls, tests):
        results = {}
        for test_name, test in iteritems(tests):
            results[test_name] = cls._aggregate_results_for_test(test)
        return results

    @classmethod
    def _aggregate_results_for_test(cls, test):
        subtest_results = cls._aggregate_results(test['tests']) if 'tests' in test else {}
        results = {}
        for metric_name, metric in iteritems(test.get('metrics', {})):
            if not isinstance(metric, list):
                results[metric_name] = {None: {}}
                for config_name, values in iteritems(metric):
                    results[metric_name][None][config_name] = cls._flatten_list(values)
                continue

            # Filter duplicate aggregators that could have arisen from merging JSONs.
            aggregator_list = list(set(metric))
            results[metric_name] = {}
            for aggregator in aggregator_list:
                values_by_config_iteration = cls._subtest_values_by_config_iteration(subtest_results, metric_name, aggregator)
                for config_name, values_by_iteration in iteritems(values_by_config_iteration):
                    results[metric_name].setdefault(aggregator, {})
                    results[metric_name][aggregator][config_name] = [cls._aggregate_values(aggregator, values) for values in values_by_iteration]

        return {'metrics': results, 'tests': subtest_results}

    @classmethod
    def _flatten_list(cls, nested_list):
        flattened_list = []
        for item in nested_list:
            if isinstance(item, list):
                flattened_list += cls._flatten_list(item)
            else:
                flattened_list.append(item)
        return flattened_list

    @classmethod
    def _subtest_values_by_config_iteration(cls, subtest_results, metric_name, aggregator):
        values_by_config_iteration = {}
        for subtest_name, subtest in iteritems(subtest_results):
            results_for_metric = subtest['metrics'].get(metric_name, {})
            if aggregator in results_for_metric:
                results_for_aggregator = results_for_metric.get(aggregator)
            elif None in results_for_metric:
                results_for_aggregator = results_for_metric.get(None)
            elif len(list(results_for_metric.keys())) == 1:
                results_for_aggregator = results_for_metric.get(list(results_for_metric.keys())[0])
            else:
                results_for_aggregator = {}
            for config_name, values in iteritems(results_for_aggregator):
                values_by_config_iteration.setdefault(config_name, [[] for _ in values])
                for iteration, value in enumerate(values):
                    values_by_config_iteration[config_name][iteration].append(value)
        return values_by_config_iteration

    @classmethod
    def _aggregate_values(cls, aggregator, values):
        return cls.aggregators[aggregator](values)

    @classmethod
    def _lint_results(cls, tests):
        cls._lint_subtest_results(tests, None, None)
        return True

    @classmethod
    def _lint_subtest_results(cls, subtests, parent_test, parent_aggregator_list):
        iteration_groups_by_config = {}
        for test_name, test in iteritems(subtests):
            aggregator_list = None

            if 'metrics' not in test and 'tests' not in test:
                raise TypeError('"%s" does not contain metrics or tests' % test_name)

            if 'metrics' in test:
                metrics = test['metrics']
                if not isinstance(metrics, dict):
                    raise TypeError('The metrics in "%s" is not a dictionary' % test_name)
                for metric_name, metric in iteritems(metrics):
                    if isinstance(metric, list):
                        # Filter duplicate aggregators that could have arisen from merging JSONs.
                        aggregator_list = list(set(metric))
                        cls._lint_aggregator_list(test_name, metric_name, aggregator_list, parent_test, parent_aggregator_list)
                    elif isinstance(metric, dict):
                        cls._lint_configuration(test_name, metric_name, metric, parent_test, parent_aggregator_list, iteration_groups_by_config)
                    else:
                        raise TypeError('"%s" metric of "%s" was not an aggregator list or a dictionary of configurations: %s' % (metric_name, test_name, str(metric)))

            if 'tests' in test:
                cls._lint_subtest_results(test['tests'], test_name, aggregator_list)
            elif aggregator_list:
                raise TypeError('"%s" requires aggregation but it has no subtests' % (test_name))
        return iteration_groups_by_config

    @classmethod
    def _lint_aggregator_list(cls, test_name, metric_name, aggregator_list, parent_test, parent_aggregator_list):
        if len(aggregator_list) != len(set(aggregator_list)):
            raise TypeError('"%s" metric of "%s" had invalid aggregator list: %s' % (metric_name, test_name, json.dumps(aggregator_list)))
        if not aggregator_list:
            raise TypeError('The aggregator list is empty in "%s" metric of "%s"' % (metric_name, test_name))
        for aggregator_name in aggregator_list:
            if cls._is_numeric(aggregator_name):
                raise TypeError('"%s" metric of "%s" is not wrapped by a configuration; e.g. "current"' % (metric_name, test_name))
            if aggregator_name not in cls.aggregators:
                raise TypeError('"%s" metric of "%s" uses unknown aggregator: %s' % (metric_name, test_name, aggregator_name))
        if not parent_aggregator_list:
            return
        for parent_aggregator in parent_aggregator_list:
            if parent_aggregator not in aggregator_list and len(aggregator_list) > 1:
                raise TypeError('"%s" metric of "%s" has no value to aggregate as "%s" in a subtest "%s"' % (
                    metric_name, parent_test, parent_aggregator, test_name))

    @classmethod
    def _lint_configuration(cls, test_name, metric_name, configurations, parent_test, parent_aggregator_list, iteration_groups_by_config):
        # FIXME: Check that config_name is always "current".
        for config_name, values in iteritems(configurations):
            nested_list_count = [isinstance(value, list) for value in values].count(True)
            if nested_list_count not in [0, len(values)]:
                raise TypeError('"%s" metric of "%s" had malformed values: %s' % (metric_name, test_name, json.dumps(values)))

            if nested_list_count:
                value_shape = []
                for value_group in values:
                    value_shape.append(len(value_group))
                    cls._lint_values(test_name, metric_name, value_group)
            else:
                value_shape = len(values)
                cls._lint_values(test_name, metric_name, values)

            iteration_groups_by_config.setdefault(metric_name, {}).setdefault(config_name, value_shape)
            if parent_aggregator_list and value_shape != iteration_groups_by_config[metric_name][config_name]:
                raise TypeError('"%s" metric of "%s" had a mismatching subtest values' % (metric_name, parent_test))

    @classmethod
    def _lint_values(cls, test_name, metric_name, values):
        if any([not cls._is_numeric(value) for value in values]):
            raise TypeError('"%s" metric of "%s" contains non-numeric value: %s' % (metric_name, test_name, json.dumps(values)))

    @classmethod
    def _is_numeric(cls, value):
        return isinstance(value, int) or isinstance(value, float)