2016-05-18 23:42:39 +00:00
#!/usr/bin/python
import argparse
import json
import os . path
import re
import subprocess
import sys
import time
import urllib2
from datetime import datetime
from abc import ABCMeta , abstractmethod
from xml . dom . minidom import parseString as parseXmlString
from util import load_server_config
from util import submit_commits
from util import text_content
2021-05-08 01:08:08 +00:00
# There are some buggy commit messages:
# Canonical link: https://commits.webkit.org/https://commits.webkit.org/232477@main
REVISION_IDENTIFIER_RE = re . compile ( r ' Canonical link: (https \ ://commits \ .webkit \ .org/)+(?P<revision_identifier> \ d+@[ \ w \ . \ -]+) \ n ' )
2016-05-18 23:42:39 +00:00
def main ( argv ) :
parser = argparse . ArgumentParser ( )
parser . add_argument ( ' --repository-config-json ' , required = True , help = ' The path to a JSON file that specifies subversion syncing options ' )
parser . add_argument ( ' --server-config-json ' , required = True , help = ' The path to a JSON file that specifies the perf dashboard ' )
parser . add_argument ( ' --seconds-to-sleep ' , type = float , default = 900 , help = ' The seconds to sleep between iterations ' )
parser . add_argument ( ' --max-fetch-count ' , type = int , default = 10 , help = ' The number of commits to fetch at once ' )
2017-02-24 07:17:48 +00:00
parser . add_argument ( ' --max-ancestor-fetch-count ' , type = int , default = 100 , help = ' The number of commits to fetch at once if some commits are missing previous commits ' )
2016-05-18 23:42:39 +00:00
args = parser . parse_args ( )
with open ( args . repository_config_json ) as repository_config_json :
repositories = [ load_repository ( repository_info ) for repository_info in json . load ( repository_config_json ) ]
while True :
server_config = load_server_config ( args . server_config_json )
for repository in repositories :
try :
2017-01-20 22:04:23 +00:00
repository . fetch_commits_and_submit ( server_config , args . max_fetch_count , args . max_ancestor_fetch_count )
2016-05-18 23:42:39 +00:00
except Exception as error :
print " Failed to fetch and sync: " , error
print " Sleeping for %d seconds... " % args . seconds_to_sleep
time . sleep ( args . seconds_to_sleep )
def load_repository ( repository ) :
if ' gitCheckout ' in repository :
2021-05-08 01:08:08 +00:00
return GitRepository (
name = repository [ ' name ' ] , git_url = repository [ ' url ' ] , git_checkout = repository [ ' gitCheckout ' ] ,
git_branch = repository . get ( ' branch ' ) , report_revision_identifier_in_commit_msg = repository . get ( ' reportRevisionIdentifier ' ) ,
report_svn_revison = repository . get ( ' reportSVNRevision ' ) )
2016-05-18 23:42:39 +00:00
return SVNRepository ( name = repository [ ' name ' ] , svn_url = repository [ ' url ' ] , should_trust_certificate = repository . get ( ' trustCertificate ' , False ) ,
use_server_auth = repository . get ( ' useServerAuth ' , False ) , account_name_script_path = repository . get ( ' accountNameFinderScript ' ) )
class Repository ( object ) :
___metaclass___ = ABCMeta
_name_account_compound_regex = re . compile ( r ' ^ \ s*(?P<name>( \ " .+ \ " |[^<]+?)) \ s* \ <(?P<account>.+) \ > \ s*$ ' )
def __init__ ( self , name ) :
self . _name = name
self . _last_fetched = None
2017-01-20 22:04:23 +00:00
def fetch_commits_and_submit ( self , server_config , max_fetch_count , max_ancestor_fetch_count ) :
2016-05-18 23:42:39 +00:00
if not self . _last_fetched :
2017-03-15 08:35:07 +00:00
print " Determining the starting revision for %s " % self . _name
2016-05-18 23:42:39 +00:00
self . _last_fetched = self . determine_last_reported_revision ( server_config )
pending_commits = [ ]
for unused in range ( max_fetch_count ) :
2017-01-20 22:04:23 +00:00
commit = self . fetch_next_commit ( server_config , self . _last_fetched )
2016-05-18 23:42:39 +00:00
if not commit :
break
pending_commits + = [ commit ]
self . _last_fetched = commit [ ' revision ' ]
if not pending_commits :
print " No new revision found for %s (last fetched: %s ) " % ( self . _name , self . format_revision ( self . _last_fetched ) )
return
2017-01-20 22:04:23 +00:00
for unused in range ( max_ancestor_fetch_count ) :
revision_list = ' , ' . join ( [ self . format_revision ( commit [ ' revision ' ] ) for commit in pending_commits ] )
print " Submitting revisions %s for %s to %s " % ( revision_list , self . _name , server_config [ ' server ' ] [ ' url ' ] )
2016-05-18 23:42:39 +00:00
2017-01-20 22:04:23 +00:00
result = submit_commits ( pending_commits , server_config [ ' server ' ] [ ' url ' ] ,
2021-03-22 21:23:38 +00:00
server_config [ ' worker ' ] [ ' name ' ] , server_config [ ' worker ' ] [ ' password ' ] , [ ' OK ' , ' FailedToFindPreviousCommit ' ] )
2016-05-18 23:42:39 +00:00
2017-01-20 22:04:23 +00:00
if result . get ( ' status ' ) == ' OK ' :
break
2017-02-24 07:17:48 +00:00
if result . get ( ' status ' ) == ' FailedToFindPreviousCommit ' :
previous_commit = self . fetch_commit ( server_config , result [ ' commit ' ] [ ' previousCommit ' ] )
if not previous_commit :
raise Exception ( ' Could not find the previous commit %s of %s ' % ( result [ ' commit ' ] [ ' previousCommit ' ] , result [ ' commit ' ] [ ' revision ' ] ) )
pending_commits = [ previous_commit ] + pending_commits
2017-01-20 22:04:23 +00:00
if result . get ( ' status ' ) != ' OK ' :
raise Exception ( result )
2016-05-18 23:42:39 +00:00
print " Successfully submitted. "
print
2017-01-20 22:04:23 +00:00
@abstractmethod
def fetch_next_commit ( self , server_config , last_fetched ) :
pass
2016-05-18 23:42:39 +00:00
@abstractmethod
def fetch_commit ( self , server_config , last_fetched ) :
pass
@abstractmethod
def format_revision ( self , revision ) :
pass
def determine_last_reported_revision ( self , server_config ) :
last_reported_revision = self . fetch_revision_from_dasbhoard ( server_config , ' last-reported ' )
if last_reported_revision :
return last_reported_revision
def fetch_revision_from_dasbhoard ( self , server_config , filter ) :
result = urllib2 . urlopen ( server_config [ ' server ' ] [ ' url ' ] + ' /api/commits/ ' + self . _name + ' / ' + filter ) . read ( )
parsed_result = json . loads ( result )
if parsed_result [ ' status ' ] != ' OK ' and parsed_result [ ' status ' ] != ' RepositoryNotFound ' :
raise Exception ( result )
commits = parsed_result . get ( ' commits ' )
return commits [ 0 ] [ ' revision ' ] if commits else None
class SVNRepository ( Repository ) :
def __init__ ( self , name , svn_url , should_trust_certificate , use_server_auth , account_name_script_path ) :
assert not account_name_script_path or isinstance ( account_name_script_path , list )
super ( SVNRepository , self ) . __init__ ( name )
self . _svn_url = svn_url
self . _should_trust_certificate = should_trust_certificate
self . _use_server_auth = use_server_auth
self . _account_name_script_path = account_name_script_path
2017-01-20 22:04:23 +00:00
def fetch_next_commit ( self , server_config , last_fetched ) :
2016-05-18 23:42:39 +00:00
if not last_fetched :
# FIXME: This is a problematic if dashboard can get results for revisions older than oldest_revision
# in the future because we never refetch older revisions.
last_fetched = self . fetch_revision_from_dasbhoard ( server_config , ' oldest ' )
revision_to_fetch = int ( last_fetched ) + 1
args = [ ' svn ' , ' log ' , ' --revision ' , str ( revision_to_fetch ) , ' --xml ' , self . _svn_url , ' --non-interactive ' ]
if self . _use_server_auth and ' auth ' in server_config [ ' server ' ] :
server_auth = server_config [ ' server ' ] [ ' auth ' ]
args + = [ ' --no-auth-cache ' , ' --username ' , server_auth [ ' username ' ] , ' --password ' , server_auth [ ' password ' ] ]
if self . _should_trust_certificate :
args + = [ ' --trust-server-cert ' ]
try :
output = subprocess . check_output ( args , stderr = subprocess . STDOUT )
except subprocess . CalledProcessError as error :
if ( ' : No such revision ' + str ( revision_to_fetch ) ) in error . output :
return None
raise error
xml = parseXmlString ( output )
time = text_content ( xml . getElementsByTagName ( " date " ) [ 0 ] )
2017-01-20 22:04:23 +00:00
author_elements = xml . getElementsByTagName ( " author " )
author_account = text_content ( author_elements [ 0 ] ) if author_elements . length else None
2016-05-18 23:42:39 +00:00
message = text_content ( xml . getElementsByTagName ( " msg " ) [ 0 ] )
2017-01-20 22:04:23 +00:00
name = self . _resolve_author_name ( author_account ) if author_account and self . _account_name_script_path else None
2016-05-18 23:42:39 +00:00
2017-01-20 22:04:23 +00:00
result = {
2016-05-18 23:42:39 +00:00
' repository ' : self . _name ,
' revision ' : revision_to_fetch ,
' time ' : time ,
' message ' : message ,
}
2017-01-20 22:04:23 +00:00
if author_account :
result [ ' author ' ] = { ' account ' : author_account , ' name ' : name }
return result
2016-05-18 23:42:39 +00:00
def _resolve_author_name ( self , account ) :
try :
output = subprocess . check_output ( self . _account_name_script_path + [ account ] )
except subprocess . CalledProcessError :
print ' Failed to resolve the name for account: ' , account
return None
match = Repository . _name_account_compound_regex . match ( output )
if match :
return match . group ( ' name ' ) . strip ( ' " ' )
return output . strip ( )
def format_revision ( self , revision ) :
return ' r ' + str ( revision )
class GitRepository ( Repository ) :
2021-05-08 01:08:08 +00:00
def __init__ ( self , name , git_checkout , git_url , git_branch = None , report_revision_identifier_in_commit_msg = False , report_svn_revison = False ) :
2016-05-18 23:42:39 +00:00
assert ( os . path . isdir ( git_checkout ) )
super ( GitRepository , self ) . __init__ ( name )
self . _git_checkout = git_checkout
self . _git_url = git_url
2021-02-12 23:03:34 +00:00
self . _git_branch = git_branch
2016-05-18 23:42:39 +00:00
self . _tokenized_hashes = [ ]
2021-05-08 01:08:08 +00:00
self . _report_revision_identifier_in_commit_msg = report_revision_identifier_in_commit_msg
self . _report_svn_revision = report_svn_revison
2016-05-18 23:42:39 +00:00
2017-01-20 22:04:23 +00:00
def fetch_next_commit ( self , server_config , last_fetched ) :
2016-05-18 23:42:39 +00:00
if not last_fetched :
self . _fetch_all_hashes ( )
tokens = self . _tokenized_hashes [ 0 ]
else :
2021-05-08 01:08:08 +00:00
if self . _report_svn_revision :
last_fetched_git_hash = self . _git_hash_from_svn_revision ( last_fetched )
if not last_fetched_git_hash :
self . _fetch_remote ( )
last_fetched_git_hash = self . _git_hash_from_svn_revision ( last_fetched )
if not last_fetched_git_hash :
raise ValueError ( ' Cannot find the git hash for the last fetched svn revision ' )
last_fetched = last_fetched_git_hash
2016-05-18 23:42:39 +00:00
tokens = self . _find_next_hash ( last_fetched )
if not tokens :
self . _fetch_all_hashes ( )
tokens = self . _find_next_hash ( last_fetched )
if not tokens :
return None
2017-01-20 22:04:23 +00:00
return self . _revision_from_tokens ( tokens )
def fetch_commit ( self , server_config , hash_to_find ) :
assert ( self . _tokenized_hashes )
for i , tokens in enumerate ( self . _tokenized_hashes ) :
if tokens and tokens [ 0 ] == hash_to_find :
return self . _revision_from_tokens ( tokens )
return None
2016-05-18 23:42:39 +00:00
2021-05-08 01:08:08 +00:00
def _svn_revision_from_git_hash ( self , git_hash ) :
return self . _run_git_command ( [ ' svn ' , ' find-rev ' , git_hash ] ) . strip ( )
def _git_hash_from_svn_revision ( self , revision ) :
return self . _run_git_command ( [ ' svn ' , ' find-rev ' , ' r {} ' . format ( revision ) ] ) . strip ( )
2017-01-20 22:04:23 +00:00
def _revision_from_tokens ( self , tokens ) :
2016-05-18 23:42:39 +00:00
current_hash = tokens [ 0 ]
commit_time = int ( tokens [ 1 ] )
author_email = tokens [ 2 ]
2017-02-24 07:17:48 +00:00
previous_hash = tokens [ 3 ] if len ( tokens ) > = 4 else None
2016-05-18 23:42:39 +00:00
author_name = self . _run_git_command ( [ ' log ' , current_hash , ' -1 ' , ' --pretty= %c n ' ] )
message = self . _run_git_command ( [ ' log ' , current_hash , ' -1 ' , ' --pretty= % B ' ] )
2021-05-08 01:08:08 +00:00
revision_identifier = None
if self . _report_revision_identifier_in_commit_msg :
revision_identifier_match = REVISION_IDENTIFIER_RE . search ( message )
if not revision_identifier_match :
raise ValueError ( ' Expected commit message to include revision identifier, but cannot find it, will need a history rewrite to fix it ' )
revision_identifier = revision_identifier_match . group ( ' revision_identifier ' )
current_revision = current_hash
previous_revision = previous_hash
if self . _report_svn_revision :
current_revision = self . _svn_revision_from_git_hash ( current_hash )
if not current_revision :
raise ValueError ( ' Cannot find SVN revison for {} ' . format ( current_hash ) )
if previous_hash :
previous_revision = self . _svn_revision_from_git_hash ( previous_hash )
if not previous_revision :
raise ValueError ( ' Cannot find SVN revison for {} ' . format ( previous_hash ) )
2016-05-18 23:42:39 +00:00
return {
' repository ' : self . _name ,
2021-05-08 01:08:08 +00:00
' revision ' : current_revision ,
' revisionIdentifier ' : revision_identifier ,
' previousCommit ' : previous_revision ,
2021-05-22 00:13:39 +00:00
' time ' : datetime . utcfromtimestamp ( commit_time ) . strftime ( r ' % Y- % m- %d T % H: % M: % S. %f ' ) ,
2016-05-18 23:42:39 +00:00
' author ' : { ' account ' : author_email , ' name ' : author_name } ,
' message ' : message ,
}
def _find_next_hash ( self , hash_to_find ) :
for i , tokens in enumerate ( self . _tokenized_hashes ) :
if tokens and tokens [ 0 ] == hash_to_find :
return self . _tokenized_hashes [ i + 1 ] if i + 1 < len ( self . _tokenized_hashes ) else None
return None
2021-05-08 01:08:08 +00:00
def _fetch_remote ( self ) :
if self . _report_svn_revision :
2021-05-20 21:46:44 +00:00
self . _run_git_command ( [ ' pull ' ] )
2021-05-18 21:25:44 +00:00
subprocess . check_call ( [ ' rm ' , ' -rf ' , os . path . join ( self . _git_checkout , ' .git/svn ' ) ] )
2021-05-08 01:08:08 +00:00
self . _run_git_command ( [ ' svn ' , ' fetch ' ] )
2021-05-20 21:46:44 +00:00
else :
self . _run_git_command ( [ ' pull ' , self . _git_url ] )
2021-05-08 01:08:08 +00:00
def _fetch_all_hashes ( self ) :
self . _fetch_remote ( )
2021-02-12 23:03:34 +00:00
scope = self . _git_branch or ' --all '
lines = self . _run_git_command ( [ ' log ' , scope , ' --date-order ' , ' --reverse ' , ' --pretty= % H %c t %c e % P ' ] ) . split ( ' \n ' )
2016-05-18 23:42:39 +00:00
self . _tokenized_hashes = [ line . split ( ) for line in lines ]
def _run_git_command ( self , args ) :
return subprocess . check_output ( [ ' git ' , ' -C ' , self . _git_checkout ] + args , stderr = subprocess . STDOUT )
def format_revision ( self , revision ) :
return str ( revision ) [ 0 : 8 ]
if __name__ == " __main__ " :
main ( sys . argv )