212 lines
9.9 KiB
Python
212 lines
9.9 KiB
Python
# Copyright (c) 2011, Google Inc. All rights reserved.
|
|
# Copyright (c) 2015, 2021 Apple Inc. All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met:
|
|
#
|
|
# * Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# * Redistributions in binary form must reproduce the above
|
|
# copyright notice, this list of conditions and the following disclaimer
|
|
# in the documentation and/or other materials provided with the
|
|
# distribution.
|
|
# * Neither the name of Google Inc. nor the names of its
|
|
# contributors may be used to endorse or promote products derived from
|
|
# this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
import datetime
|
|
import json
|
|
import logging
|
|
import re
|
|
|
|
from webkitcorepy import string_utils
|
|
|
|
|
|
_log = logging.getLogger(__name__)
|
|
|
|
|
|
class CrashLogs(object):
|
|
|
|
# Matches a string like ' Global D1 PID: [14516]'
|
|
GLOBAL_PID_REGEX = re.compile(r'\s+Global\b.+\bPID:\s+\[(?P<pid>\d+)\]')
|
|
EXIT_PROCESS_PID_REGEX = re.compile(r'Exit process \d+:(?P<pid>\w+), code')
|
|
DARWIN_PROCESS_REGEX = re.compile(r'^Process:\s+(?P<process_name>.*) \[(?P<pid>\d+)\]$')
|
|
|
|
def __init__(self, host, crash_log_directory, crash_logs_to_skip=[]):
|
|
self._host = host
|
|
self._crash_log_directory = crash_log_directory
|
|
self._crash_logs_to_skip = crash_logs_to_skip
|
|
|
|
def find_newest_log(self, process_name, pid=None, include_errors=False, newer_than=None):
|
|
if self._host.platform.is_mac() or self._host.platform.is_ios():
|
|
return self._find_newest_log_darwin(process_name, pid, include_errors, newer_than)
|
|
elif self._host.platform.is_win():
|
|
return self._find_newest_log_win(pid, include_errors, newer_than)
|
|
return None
|
|
|
|
def find_all_logs(self, include_errors=False, newer_than=None):
|
|
if self._host.platform.is_mac() or self._host.platform.is_ios():
|
|
return self._find_all_logs_darwin(include_errors, newer_than)
|
|
return None
|
|
|
|
def _parse_darwin_crash_log(self, path):
|
|
contents = self._host.symbolicate_crash_log_if_needed(path)
|
|
if not contents:
|
|
return (None, None, None)
|
|
|
|
lines = contents.splitlines()
|
|
if len(lines) >= 2 and lines[0].startswith('{') and lines[1].startswith('{'):
|
|
try:
|
|
json.loads(lines[0])
|
|
decoded = json.loads('\n'.join(lines[1:]))
|
|
name = decoded.get('procName')
|
|
pid = decoded.get('pid')
|
|
if name and pid:
|
|
return (name, pid, contents)
|
|
except ValueError:
|
|
pass
|
|
|
|
is_sandbox_violation = False
|
|
for line in lines:
|
|
if line.startswith('Sandbox Violation:'):
|
|
is_sandbox_violation = True
|
|
match = CrashLogs.DARWIN_PROCESS_REGEX.match(line)
|
|
if match:
|
|
return (('Sandbox-' if is_sandbox_violation else '') + match.group('process_name'), int(match.group('pid')), contents)
|
|
return (None, None, contents)
|
|
|
|
def _find_newest_log_darwin(self, process_name, pid, include_errors, newer_than):
|
|
def is_crash_log(fs, dirpath, basename):
|
|
if self._crash_logs_to_skip and fs.join(dirpath, basename) in self._crash_logs_to_skip:
|
|
return False
|
|
return (basename.startswith(process_name + '_') and (basename.endswith('.crash')) or
|
|
(process_name in basename and basename.endswith('.ips')))
|
|
|
|
logs = self._host.filesystem.files_under(self._crash_log_directory, file_filter=is_crash_log)
|
|
errors = ''
|
|
for path in reversed(sorted(logs)):
|
|
try:
|
|
if not newer_than or self._host.filesystem.mtime(path) > newer_than:
|
|
parsed_name, parsed_pid, log_contents = self._parse_darwin_crash_log(path)
|
|
if parsed_name == process_name and (pid is None or parsed_pid == pid):
|
|
return errors + log_contents
|
|
except IOError as e:
|
|
if include_errors:
|
|
errors += "ERROR: Failed to read '%s': %s\n" % (path, str(e))
|
|
except OSError as e:
|
|
if include_errors:
|
|
errors += "ERROR: Failed to read '%s': %s\n" % (path, str(e))
|
|
|
|
if include_errors and errors:
|
|
return errors
|
|
return None
|
|
|
|
def _find_newest_log_win(self, pid, include_errors, newer_than):
|
|
def is_crash_log(fs, dirpath, basename):
|
|
if self._crash_logs_to_skip and fs.join(dirpath, basename) in self._crash_logs_to_skip:
|
|
return False
|
|
return basename.startswith("CrashLog")
|
|
|
|
logs = self._host.filesystem.files_under(self._crash_log_directory, file_filter=is_crash_log)
|
|
errors = u''
|
|
for path in reversed(sorted(logs)):
|
|
try:
|
|
if not newer_than or self._host.filesystem.mtime(path) > newer_than:
|
|
log_file = string_utils.decode(self._host.filesystem.read_binary_file(path), encoding='ascii', errors='ignore')
|
|
match = self.GLOBAL_PID_REGEX.search(log_file)
|
|
if match:
|
|
if int(match.group('pid')) == pid:
|
|
return errors + log_file
|
|
match = self.EXIT_PROCESS_PID_REGEX.search(log_file)
|
|
if match is None:
|
|
continue
|
|
# Note: This output comes from a program that shows PID in hex:
|
|
if int(match.group('pid'), 16) == pid:
|
|
return errors + log_file
|
|
except IOError as e:
|
|
if include_errors:
|
|
errors += u"ERROR: Failed to read '%s': %s\n" % (path, str(e))
|
|
except OSError as e:
|
|
if include_errors:
|
|
errors += u"ERROR: Failed to read '%s': %s\n" % (path, str(e))
|
|
except UnicodeDecodeError as e:
|
|
if include_errors:
|
|
errors += u"ERROR: Failed to decode '%s' as ascii: %s\n" % (path, str(e))
|
|
|
|
if include_errors and errors:
|
|
return errors
|
|
return None
|
|
|
|
def _find_all_logs_darwin(self, include_errors, newer_than):
|
|
def is_crash_log(fs, dirpath, basename):
|
|
if self._crash_logs_to_skip and fs.join(dirpath, basename) in self._crash_logs_to_skip:
|
|
return False
|
|
return basename.endswith('.crash') or basename.endswith('.ips')
|
|
|
|
logs = self._host.filesystem.files_under(self._crash_log_directory, file_filter=is_crash_log)
|
|
errors = ''
|
|
crash_logs = {}
|
|
for path in reversed(sorted(logs)):
|
|
try:
|
|
if not newer_than or self._host.filesystem.mtime(path) > newer_than:
|
|
result_name = "Unknown"
|
|
parsed_name, parsed_pid, log_contents = self._parse_darwin_crash_log(path)
|
|
if not log_contents:
|
|
_log.warn('No data in crash log at {}'.format(path))
|
|
continue
|
|
|
|
# Verify timestamp from log contents
|
|
crash_time = self.get_timestamp_from_log(log_contents)
|
|
if crash_time is not None and newer_than is not None:
|
|
start_time = datetime.datetime.fromtimestamp(float(newer_than))
|
|
if crash_time < start_time:
|
|
continue
|
|
|
|
if parsed_name:
|
|
result_name = parsed_name + "-" + str(parsed_pid)
|
|
|
|
# Processes can remain running after Sandbox violations, which generate crash logs.
|
|
# This means that we can have mutliple crash logs attributed to the same process.
|
|
# The unique_name must be named in the format PROCESS_NAME-PID-# or Sandbox-PROCESS_NAME-PID-#,
|
|
# where '-#' is optional. This is because of how DarwinPort._merge_crash_logs parses the crash name.
|
|
count = 1
|
|
unique_name = result_name
|
|
while unique_name in crash_logs:
|
|
unique_name = result_name + '-' + str(count)
|
|
count += 1
|
|
crash_logs[unique_name] = errors + log_contents
|
|
except IOError as e:
|
|
if include_errors:
|
|
errors += "ERROR: Failed to read '%s': %s\n" % (path, str(e))
|
|
except OSError as e:
|
|
if include_errors:
|
|
errors += "ERROR: Failed to read '%s': %s\n" % (path, str(e))
|
|
|
|
if include_errors and errors and len(crash_logs) == 0:
|
|
return errors
|
|
return crash_logs
|
|
|
|
def get_timestamp_from_log(self, log_contents):
|
|
date_match = re.search('Date/Time:\\s+(.+?)\n', log_contents)
|
|
if not date_match:
|
|
return None
|
|
try:
|
|
crash_time_str = ' '.join(date_match.group(1).split(" ")[0:2])
|
|
crash_time = datetime.datetime.strptime(crash_time_str, '%Y-%m-%d %H:%M:%S.%f')
|
|
except ValueError:
|
|
return None
|
|
return crash_time
|