311 lines
14 KiB
Python
Executable File
311 lines
14 KiB
Python
Executable File
#!/usr/bin/python
|
|
|
|
import argparse
|
|
import json
|
|
import os.path
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import urllib2
|
|
|
|
from datetime import datetime
|
|
from abc import ABCMeta, abstractmethod
|
|
from xml.dom.minidom import parseString as parseXmlString
|
|
from util import load_server_config
|
|
from util import submit_commits
|
|
from util import text_content
|
|
|
|
# There are some buggy commit messages:
|
|
# Canonical link: https://commits.webkit.org/https://commits.webkit.org/232477@main
|
|
REVISION_IDENTIFIER_RE = re.compile(r'Canonical link: (https\://commits\.webkit\.org/)+(?P<revision_identifier>\d+@[\w\.\-]+)\n')
|
|
|
|
def main(argv):
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--repository-config-json', required=True, help='The path to a JSON file that specifies subversion syncing options')
|
|
parser.add_argument('--server-config-json', required=True, help='The path to a JSON file that specifies the perf dashboard')
|
|
parser.add_argument('--seconds-to-sleep', type=float, default=900, help='The seconds to sleep between iterations')
|
|
parser.add_argument('--max-fetch-count', type=int, default=10, help='The number of commits to fetch at once')
|
|
parser.add_argument('--max-ancestor-fetch-count', type=int, default=100, help='The number of commits to fetch at once if some commits are missing previous commits')
|
|
args = parser.parse_args()
|
|
|
|
with open(args.repository_config_json) as repository_config_json:
|
|
repositories = [load_repository(repository_info) for repository_info in json.load(repository_config_json)]
|
|
|
|
while True:
|
|
server_config = load_server_config(args.server_config_json)
|
|
for repository in repositories:
|
|
try:
|
|
repository.fetch_commits_and_submit(server_config, args.max_fetch_count, args.max_ancestor_fetch_count)
|
|
except Exception as error:
|
|
print "Failed to fetch and sync:", error
|
|
|
|
print "Sleeping for %d seconds..." % args.seconds_to_sleep
|
|
time.sleep(args.seconds_to_sleep)
|
|
|
|
|
|
def load_repository(repository):
|
|
if 'gitCheckout' in repository:
|
|
return GitRepository(
|
|
name=repository['name'], git_url=repository['url'], git_checkout=repository['gitCheckout'],
|
|
git_branch=repository.get('branch'), report_revision_identifier_in_commit_msg=repository.get('reportRevisionIdentifier'),
|
|
report_svn_revison=repository.get('reportSVNRevision'))
|
|
return SVNRepository(name=repository['name'], svn_url=repository['url'], should_trust_certificate=repository.get('trustCertificate', False),
|
|
use_server_auth=repository.get('useServerAuth', False), account_name_script_path=repository.get('accountNameFinderScript'))
|
|
|
|
|
|
class Repository(object):
|
|
___metaclass___ = ABCMeta
|
|
|
|
_name_account_compound_regex = re.compile(r'^\s*(?P<name>(\".+\"|[^<]+?))\s*\<(?P<account>.+)\>\s*$')
|
|
|
|
def __init__(self, name):
|
|
self._name = name
|
|
self._last_fetched = None
|
|
|
|
def fetch_commits_and_submit(self, server_config, max_fetch_count, max_ancestor_fetch_count):
|
|
if not self._last_fetched:
|
|
print "Determining the starting revision for %s" % self._name
|
|
self._last_fetched = self.determine_last_reported_revision(server_config)
|
|
|
|
pending_commits = []
|
|
for unused in range(max_fetch_count):
|
|
commit = self.fetch_next_commit(server_config, self._last_fetched)
|
|
if not commit:
|
|
break
|
|
pending_commits += [commit]
|
|
self._last_fetched = commit['revision']
|
|
|
|
if not pending_commits:
|
|
print "No new revision found for %s (last fetched: %s)" % (self._name, self.format_revision(self._last_fetched))
|
|
return
|
|
|
|
for unused in range(max_ancestor_fetch_count):
|
|
revision_list = ', '.join([self.format_revision(commit['revision']) for commit in pending_commits])
|
|
print "Submitting revisions %s for %s to %s" % (revision_list, self._name, server_config['server']['url'])
|
|
|
|
result = submit_commits(pending_commits, server_config['server']['url'],
|
|
server_config['worker']['name'], server_config['worker']['password'], ['OK', 'FailedToFindPreviousCommit'])
|
|
|
|
if result.get('status') == 'OK':
|
|
break
|
|
|
|
if result.get('status') == 'FailedToFindPreviousCommit':
|
|
previous_commit = self.fetch_commit(server_config, result['commit']['previousCommit'])
|
|
if not previous_commit:
|
|
raise Exception('Could not find the previous commit %s of %s' % (result['commit']['previousCommit'], result['commit']['revision']))
|
|
pending_commits = [previous_commit] + pending_commits
|
|
|
|
if result.get('status') != 'OK':
|
|
raise Exception(result)
|
|
|
|
print "Successfully submitted."
|
|
print
|
|
|
|
@abstractmethod
|
|
def fetch_next_commit(self, server_config, last_fetched):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def fetch_commit(self, server_config, last_fetched):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def format_revision(self, revision):
|
|
pass
|
|
|
|
def determine_last_reported_revision(self, server_config):
|
|
last_reported_revision = self.fetch_revision_from_dasbhoard(server_config, 'last-reported')
|
|
if last_reported_revision:
|
|
return last_reported_revision
|
|
|
|
def fetch_revision_from_dasbhoard(self, server_config, filter):
|
|
result = urllib2.urlopen(server_config['server']['url'] + '/api/commits/' + self._name + '/' + filter).read()
|
|
parsed_result = json.loads(result)
|
|
if parsed_result['status'] != 'OK' and parsed_result['status'] != 'RepositoryNotFound':
|
|
raise Exception(result)
|
|
commits = parsed_result.get('commits')
|
|
return commits[0]['revision'] if commits else None
|
|
|
|
|
|
class SVNRepository(Repository):
|
|
|
|
def __init__(self, name, svn_url, should_trust_certificate, use_server_auth, account_name_script_path):
|
|
assert not account_name_script_path or isinstance(account_name_script_path, list)
|
|
super(SVNRepository, self).__init__(name)
|
|
self._svn_url = svn_url
|
|
self._should_trust_certificate = should_trust_certificate
|
|
self._use_server_auth = use_server_auth
|
|
self._account_name_script_path = account_name_script_path
|
|
|
|
def fetch_next_commit(self, server_config, last_fetched):
|
|
if not last_fetched:
|
|
# FIXME: This is a problematic if dashboard can get results for revisions older than oldest_revision
|
|
# in the future because we never refetch older revisions.
|
|
last_fetched = self.fetch_revision_from_dasbhoard(server_config, 'oldest')
|
|
|
|
revision_to_fetch = int(last_fetched) + 1
|
|
|
|
args = ['svn', 'log', '--revision', str(revision_to_fetch), '--xml', self._svn_url, '--non-interactive']
|
|
if self._use_server_auth and 'auth' in server_config['server']:
|
|
server_auth = server_config['server']['auth']
|
|
args += ['--no-auth-cache', '--username', server_auth['username'], '--password', server_auth['password']]
|
|
if self._should_trust_certificate:
|
|
args += ['--trust-server-cert']
|
|
|
|
try:
|
|
output = subprocess.check_output(args, stderr=subprocess.STDOUT)
|
|
except subprocess.CalledProcessError as error:
|
|
if (': No such revision ' + str(revision_to_fetch)) in error.output:
|
|
return None
|
|
raise error
|
|
|
|
xml = parseXmlString(output)
|
|
time = text_content(xml.getElementsByTagName("date")[0])
|
|
author_elements = xml.getElementsByTagName("author")
|
|
author_account = text_content(author_elements[0]) if author_elements.length else None
|
|
message = text_content(xml.getElementsByTagName("msg")[0])
|
|
|
|
name = self._resolve_author_name(author_account) if author_account and self._account_name_script_path else None
|
|
|
|
result = {
|
|
'repository': self._name,
|
|
'revision': revision_to_fetch,
|
|
'time': time,
|
|
'message': message,
|
|
}
|
|
|
|
if author_account:
|
|
result['author'] = {'account': author_account, 'name': name}
|
|
|
|
return result
|
|
|
|
def _resolve_author_name(self, account):
|
|
try:
|
|
output = subprocess.check_output(self._account_name_script_path + [account])
|
|
except subprocess.CalledProcessError:
|
|
print 'Failed to resolve the name for account:', account
|
|
return None
|
|
|
|
match = Repository._name_account_compound_regex.match(output)
|
|
if match:
|
|
return match.group('name').strip('"')
|
|
return output.strip()
|
|
|
|
def format_revision(self, revision):
|
|
return 'r' + str(revision)
|
|
|
|
|
|
class GitRepository(Repository):
|
|
|
|
def __init__(self, name, git_checkout, git_url, git_branch=None, report_revision_identifier_in_commit_msg=False, report_svn_revison=False):
|
|
assert(os.path.isdir(git_checkout))
|
|
super(GitRepository, self).__init__(name)
|
|
self._git_checkout = git_checkout
|
|
self._git_url = git_url
|
|
self._git_branch = git_branch
|
|
self._tokenized_hashes = []
|
|
self._report_revision_identifier_in_commit_msg = report_revision_identifier_in_commit_msg
|
|
self._report_svn_revision = report_svn_revison
|
|
|
|
def fetch_next_commit(self, server_config, last_fetched):
|
|
if not last_fetched:
|
|
self._fetch_all_hashes()
|
|
tokens = self._tokenized_hashes[0]
|
|
else:
|
|
if self._report_svn_revision:
|
|
last_fetched_git_hash = self._git_hash_from_svn_revision(last_fetched)
|
|
if not last_fetched_git_hash:
|
|
self._fetch_remote()
|
|
last_fetched_git_hash = self._git_hash_from_svn_revision(last_fetched)
|
|
if not last_fetched_git_hash:
|
|
raise ValueError('Cannot find the git hash for the last fetched svn revision')
|
|
last_fetched = last_fetched_git_hash
|
|
tokens = self._find_next_hash(last_fetched)
|
|
if not tokens:
|
|
self._fetch_all_hashes()
|
|
tokens = self._find_next_hash(last_fetched)
|
|
if not tokens:
|
|
return None
|
|
return self._revision_from_tokens(tokens)
|
|
|
|
def fetch_commit(self, server_config, hash_to_find):
|
|
assert(self._tokenized_hashes)
|
|
for i, tokens in enumerate(self._tokenized_hashes):
|
|
if tokens and tokens[0] == hash_to_find:
|
|
return self._revision_from_tokens(tokens)
|
|
return None
|
|
|
|
def _svn_revision_from_git_hash(self, git_hash):
|
|
return self._run_git_command(['svn', 'find-rev', git_hash]).strip()
|
|
|
|
def _git_hash_from_svn_revision(self, revision):
|
|
return self._run_git_command(['svn', 'find-rev', 'r{}'.format(revision)]).strip()
|
|
|
|
def _revision_from_tokens(self, tokens):
|
|
current_hash = tokens[0]
|
|
commit_time = int(tokens[1])
|
|
author_email = tokens[2]
|
|
previous_hash = tokens[3] if len(tokens) >= 4 else None
|
|
|
|
author_name = self._run_git_command(['log', current_hash, '-1', '--pretty=%cn'])
|
|
message = self._run_git_command(['log', current_hash, '-1', '--pretty=%B'])
|
|
|
|
revision_identifier = None
|
|
if self._report_revision_identifier_in_commit_msg:
|
|
revision_identifier_match = REVISION_IDENTIFIER_RE.search(message)
|
|
if not revision_identifier_match:
|
|
raise ValueError('Expected commit message to include revision identifier, but cannot find it, will need a history rewrite to fix it')
|
|
revision_identifier = revision_identifier_match.group('revision_identifier')
|
|
|
|
current_revision = current_hash
|
|
previous_revision = previous_hash
|
|
if self._report_svn_revision:
|
|
current_revision = self._svn_revision_from_git_hash(current_hash)
|
|
if not current_revision:
|
|
raise ValueError('Cannot find SVN revison for {}'.format(current_hash))
|
|
if previous_hash:
|
|
previous_revision = self._svn_revision_from_git_hash(previous_hash)
|
|
if not previous_revision:
|
|
raise ValueError('Cannot find SVN revison for {}'.format(previous_hash))
|
|
|
|
return {
|
|
'repository': self._name,
|
|
'revision': current_revision,
|
|
'revisionIdentifier': revision_identifier,
|
|
'previousCommit': previous_revision,
|
|
'time': datetime.utcfromtimestamp(commit_time).strftime(r'%Y-%m-%dT%H:%M:%S.%f'),
|
|
'author': {'account': author_email, 'name': author_name},
|
|
'message': message,
|
|
}
|
|
|
|
def _find_next_hash(self, hash_to_find):
|
|
for i, tokens in enumerate(self._tokenized_hashes):
|
|
if tokens and tokens[0] == hash_to_find:
|
|
return self._tokenized_hashes[i + 1] if i + 1 < len(self._tokenized_hashes) else None
|
|
return None
|
|
|
|
def _fetch_remote(self):
|
|
if self._report_svn_revision:
|
|
self._run_git_command(['pull'])
|
|
subprocess.check_call(['rm', '-rf', os.path.join(self._git_checkout, '.git/svn')])
|
|
self._run_git_command(['svn', 'fetch'])
|
|
else:
|
|
self._run_git_command(['pull', self._git_url])
|
|
|
|
def _fetch_all_hashes(self):
|
|
self._fetch_remote()
|
|
scope = self._git_branch or '--all'
|
|
lines = self._run_git_command(['log', scope, '--date-order', '--reverse', '--pretty=%H %ct %ce %P']).split('\n')
|
|
self._tokenized_hashes = [line.split() for line in lines]
|
|
|
|
def _run_git_command(self, args):
|
|
return subprocess.check_output(['git', '-C', self._git_checkout] + args, stderr=subprocess.STDOUT)
|
|
|
|
def format_revision(self, revision):
|
|
return str(revision)[0:8]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main(sys.argv)
|