require 'cgi' require 'diff' require 'open3' require 'open-uri' require 'pp' require 'set' require 'tempfile' module PrettyPatch public GIT_PATH = "git" def self.prettify(string) $last_prettify_file_count = -1 $last_prettify_part_count = { "remove" => 0, "add" => 0, "shared" => 0, "binary" => 0, "extract-error" => 0 } string = normalize_line_ending(string) str = "#{HEADER}\n" # Just look at the first line to see if it is an SVN revision number as added # by webkit-patch for git checkouts. $svn_revision = 0 string.each_line do |line| match = /^Subversion\ Revision: (\d*)$/.match(line) unless match.nil? str << "#{match[1]}\n" $svn_revision = match[1].to_i; end break end fileDiffs = FileDiff.parse(string) # Newly added images get two diffs with svn 1.7; toss the first one. deleteIndices = [] for i in 1...fileDiffs.length prev = i - 1 if fileDiffs[prev].image and not fileDiffs[prev].image_url and fileDiffs[i].image and fileDiffs[i].image_url and fileDiffs[prev].filename == fileDiffs[i].filename deleteIndices.unshift(prev) end end deleteIndices.each{ |i| fileDiffs.delete_at(i) } $last_prettify_file_count = fileDiffs.length str << fileDiffs.collect{ |diff| diff.to_html }.join str << "" end def self.filename_from_diff_header(line) DIFF_HEADER_FORMATS.each do |format| match = format.match(line) return match[1] unless match.nil? end nil end def self.diff_header?(line) RELAXED_DIFF_HEADER_FORMATS.any? { |format| line =~ format } end private DIFF_HEADER_FORMATS = [ /^Index: (.*)\r?$/, /^diff --git "?a\/.+"? "?b\/(.+)"?\r?$/, /^\+\+\+ ([^\t]+)(\t.*)?\r?$/ ] RELAXED_DIFF_HEADER_FORMATS = [ /^Index:/, /^diff/ ] RENAME_FROM = /^rename from (.*)/ SVN_BINARY_FILE_MARKER_FORMAT = /^Cannot display: file marked as a binary type.$/ SVN_IMAGE_FILE_MARKER_FORMAT = /^svn:mime-type = image\/png$/ SVN_PROPERTY_CHANGES_FORMAT = /^Property changes on: (.*)/ GIT_INDEX_MARKER_FORMAT = /^index ([0-9a-f]{40})\.\.([0-9a-f]{40})/ GIT_BINARY_FILE_MARKER_FORMAT = /^GIT binary patch$/ GIT_BINARY_PATCH_FORMAT = /^(literal|delta) \d+$/ GIT_LITERAL_FORMAT = /^literal \d+$/ GIT_DELTA_FORMAT = /^delta \d+$/ SVN_START_OF_BINARY_DATA_FORMAT = /^[0-9a-zA-Z\+\/=]{20,}/ # Assume 20 chars without a space is base64 binary data. START_OF_SECTION_FORMAT = /^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@\s*(.*)/ START_OF_EXTENT_STRING = "%c" % 0 END_OF_EXTENT_STRING = "%c" % 1 # We won't search for intra-line diffs in lines longer than this length, to avoid hangs. See . MAXIMUM_INTRALINE_DIFF_LINE_LENGTH = 10000 SMALLEST_EQUAL_OPERATION = 3 OPENSOURCE_TRAC_URL = "http://trac.webkit.org/" OPENSOURCE_DIRS = Set.new %w[ Examples LayoutTests PerformanceTests Source Tools WebKitLibraries Websites ] IMAGE_CHECKSUM_ERROR = "INVALID: Image lacks a checksum. This will fail with a MISSING error in run-webkit-tests. Always generate new png files using run-webkit-tests." def self.normalize_line_ending(s) if RUBY_VERSION >= "1.9" # Transliteration table from http://stackoverflow.com/a/6609998 transliteration_table = { '\xc2\x82' => ',', # High code comma '\xc2\x84' => ',,', # High code double comma '\xc2\x85' => '...', # Tripple dot '\xc2\x88' => '^', # High carat '\xc2\x91' => '\x27', # Forward single quote '\xc2\x92' => '\x27', # Reverse single quote '\xc2\x93' => '\x22', # Forward double quote '\xc2\x94' => '\x22', # Reverse double quote '\xc2\x95' => ' ', '\xc2\x96' => '-', # High hyphen '\xc2\x97' => '--', # Double hyphen '\xc2\x99' => ' ', '\xc2\xa0' => ' ', '\xc2\xa6' => '|', # Split vertical bar '\xc2\xab' => '<<', # Double less than '\xc2\xbb' => '>>', # Double greater than '\xc2\xbc' => '1/4', # one quarter '\xc2\xbd' => '1/2', # one half '\xc2\xbe' => '3/4', # three quarters '\xca\xbf' => '\x27', # c-single quote '\xcc\xa8' => '', # modifier - under curve '\xcc\xb1' => '' # modifier - under line } encoded_string = s.force_encoding('UTF-8').encode('UTF-16', :invalid => :replace, :replace => '', :fallback => transliteration_table).encode('UTF-8') encoded_string.gsub /\r\n?/, "\n" else s.gsub /\r\n?/, "\n" end end def self.find_url_and_path(file_path) # Search file_path from the bottom up, at each level checking whether # we've found a directory we know exists in the source tree. dirname, basename = File.split(file_path) dirname.split(/\//).reverse.inject(basename) do |path, directory| path = directory + "/" + path return [OPENSOURCE_TRAC_URL, path] if OPENSOURCE_DIRS.include?(directory) path end [nil, file_path] end def self.linkifyFilename(filename, force) if force "#{filename}" else url, pathBeneathTrunk = find_url_and_path(filename) url.nil? ? filename : "#{filename}" end end HEADER =< EOF def self.revisionOrDescription(string) case string when /\(revision \d+\)/ /\(revision (\d+)\)/.match(string)[1] when /\(.*\)/ /\((.*)\)/.match(string)[1] end end def self.has_image_suffix(filename) filename =~ /\.(png|jpg|gif)$/ end class FileDiff attr_reader :filename attr_reader :image attr_reader :image_url def initialize(lines) @filename = PrettyPatch.filename_from_diff_header(lines[0].chomp) startOfSections = 1 for i in 0...lines.length case lines[i] when /^--- / @from = PrettyPatch.revisionOrDescription(lines[i]) when /^\+\+\+ / @filename = PrettyPatch.filename_from_diff_header(lines[i].chomp) if @filename.nil? @to = PrettyPatch.revisionOrDescription(lines[i]) startOfSections = i + 1 # Check for 'property' patch, then image data, since svn 1.7 creates a fake patch for property changes. if /^$/.match(lines[startOfSections]) and SVN_PROPERTY_CHANGES_FORMAT.match(lines[startOfSections + 1]) then startOfSections += 2 for x in startOfSections...lines.length next if not /^$/.match(lines[x]) if SVN_START_OF_BINARY_DATA_FORMAT.match(lines[x + 1]) then startOfSections = x + 1 @binary = true @image = true break end end end break when SVN_BINARY_FILE_MARKER_FORMAT @binary = true if (SVN_IMAGE_FILE_MARKER_FORMAT.match(lines[i + 1]) or PrettyPatch.has_image_suffix(@filename)) then @image = true startOfSections = i + 2 for x in startOfSections...lines.length # Binary diffs often have property changes listed before the actual binary data. Skip them. if SVN_START_OF_BINARY_DATA_FORMAT.match(lines[x]) then startOfSections = x break end end end break when GIT_INDEX_MARKER_FORMAT @git_indexes = [$1, $2] when GIT_BINARY_FILE_MARKER_FORMAT @binary = true if (GIT_BINARY_PATCH_FORMAT.match(lines[i + 1]) and PrettyPatch.has_image_suffix(@filename)) then @git_image = true startOfSections = i + 1 end break when RENAME_FROM @renameFrom = RENAME_FROM.match(lines[i])[1] end end lines_with_contents = lines[startOfSections...lines.length] @sections = DiffSection.parse(lines_with_contents) unless @binary if @image and not lines_with_contents.empty? @image_url = "data:image/png;base64," + lines_with_contents.join @image_checksum = FileDiff.read_checksum_from_png(lines_with_contents.join.unpack("m").join) elsif @git_image begin raise "index line is missing" unless @git_indexes chunks = nil for i in 0...lines_with_contents.length if lines_with_contents[i] =~ /^$/ chunks = [lines_with_contents[i + 1 .. -1], lines_with_contents[0 .. i]] break end end raise "no binary chunks" unless chunks from_filepath = FileDiff.extract_contents_of_from_revision(@filename, chunks[0], @git_indexes[0]) to_filepath = FileDiff.extract_contents_of_to_revision(@filename, chunks[1], @git_indexes[1], from_filepath, @git_indexes[0]) filepaths = from_filepath, to_filepath binary_contents = filepaths.collect { |filepath| File.exists?(filepath) ? File.read(filepath) : nil } @image_urls = binary_contents.collect { |content| (content and not content.empty?) ? "data:image/png;base64," + [content].pack("m") : nil } @image_checksums = binary_contents.collect { |content| FileDiff.read_checksum_from_png(content) } rescue $last_prettify_part_count["extract-error"] += 1 @image_error = "Exception raised during decoding git binary patch:
#{CGI.escapeHTML($!.to_s + "\n" + $!.backtrace.join("\n"))}
" ensure File.unlink(from_filepath) if (from_filepath and File.exists?(from_filepath)) File.unlink(to_filepath) if (to_filepath and File.exists?(to_filepath)) end end nil end def image_to_html if not @image_url then return "Image file removed" end image_checksum = "" if @image_checksum image_checksum = @image_checksum elsif @filename.include? "-expected.png" and @image_url image_checksum = IMAGE_CHECKSUM_ERROR end return "

" + image_checksum + "

" end def to_html str = "
\n" if @renameFrom str += "

#{@filename}

" str += "was renamed from" str += "

#{PrettyPatch.linkifyFilename(@renameFrom.to_s, true)}

" else str += "

#{PrettyPatch.linkifyFilename(@filename, false)}

\n" end if @image then str += self.image_to_html elsif @git_image then if @image_error str += @image_error else for i in (0...2) image_url = @image_urls[i] image_checksum = @image_checksums[i] style = ["remove", "add"][i] str += "

" if image_checksum str += image_checksum elsif @filename.include? "-expected.png" and image_url str += IMAGE_CHECKSUM_ERROR end str += "
" if image_url str += "" else str += ["

Added", "

Removed"][i] end end end elsif @binary then $last_prettify_part_count["binary"] += 1 str += "Binary file, nothing to see here" else str += @sections.collect{ |section| section.to_html }.join("
\n") unless @sections.nil? end if @from then str += "" + @from + "" end str += "
\n" end def self.parse(string) haveSeenDiffHeader = false linesForDiffs = [] line_array = string.lines.to_a line_array.each_with_index do |line, index| if (PrettyPatch.diff_header?(line)) linesForDiffs << [] haveSeenDiffHeader = true elsif (!haveSeenDiffHeader && line =~ /^--- / && line_array[index + 1] =~ /^\+\+\+ /) linesForDiffs << [] haveSeenDiffHeader = false end linesForDiffs.last << line unless linesForDiffs.last.nil? end linesForDiffs.collect { |lines| FileDiff.new(lines) } end def self.read_checksum_from_png(png_bytes) # Ruby 1.9 added the concept of string encodings, so to avoid treating binary data as UTF-8, # we can force the encoding to binary at this point. if RUBY_VERSION >= "1.9" png_bytes.force_encoding('binary') end match = png_bytes && png_bytes.match(/tEXtchecksum\0([a-fA-F0-9]{32})/) match ? match[1] : nil end def self.git_new_file_binary_patch(filename, encoded_chunk, git_index) return <= 2.3.3) helpcmd = GIT_PATH + " help apply" stdin, stdout, stderr = *Open3.popen3(helpcmd) begin if stdout.read().include? "--unsafe-paths" cmd += " --unsafe-paths" end end cmd += " --directory=" + File.dirname(output_filepath) stdin, stdout, stderr = *Open3.popen3(cmd) begin stdin.puts(patch) stdin.close error = stderr.read if error != "" error = "Error running " + cmd + "\n" + "with patch:\n" + patch[0..500] + "...\n" + error end raise error if error != "" ensure stdin.close unless stdin.closed? stdout.close stderr.close end end def self.extract_contents_from_git_binary_literal_chunk(encoded_chunk, git_index) filepath, filename = get_new_temp_filepath_and_name patch = FileDiff.git_new_file_binary_patch(filename, encoded_chunk, git_index) run_git_apply_on_patch(filepath, patch) return filepath end def self.extract_contents_from_git_binary_delta_chunk(from_filepath, from_git_index, encoded_chunk, to_git_index) to_filepath, to_filename = get_new_temp_filepath_and_name from_filename = File.basename(from_filepath) patch = FileDiff.git_changed_file_binary_patch(to_filename, from_filename, encoded_chunk, to_git_index, from_git_index) run_git_apply_on_patch(to_filepath, patch) return to_filepath end def self.extract_contents_of_from_revision(repository_path, encoded_chunk, git_index) # For literal encoded, simply reconstruct. if GIT_LITERAL_FORMAT.match(encoded_chunk[0]) return extract_contents_from_git_binary_literal_chunk(encoded_chunk, git_index) end # For delta encoded, download from svn. if GIT_DELTA_FORMAT.match(encoded_chunk[0]) return download_from_revision_from_svn(repository_path) end raise "Error: unknown git patch encoding" end def self.extract_contents_of_to_revision(repository_path, encoded_chunk, git_index, from_filepath, from_git_index) # For literal encoded, simply reconstruct. if GIT_LITERAL_FORMAT.match(encoded_chunk[0]) return extract_contents_from_git_binary_literal_chunk(encoded_chunk, git_index) end # For delta encoded, reconstruct using delta and previously constructed 'from' revision. if GIT_DELTA_FORMAT.match(encoded_chunk[0]) return extract_contents_from_git_binary_delta_chunk(from_filepath, from_git_index, encoded_chunk, git_index) end raise "Error: unknown git patch encoding" end end class DiffBlock attr_accessor :parts def initialize(container) @parts = [] container << self end def to_html str = "
\n" str += @parts.collect{ |part| part.to_html }.join str += "
\n" end end class DiffBlockPart attr_reader :className attr :lines def initialize(className, container) $last_prettify_part_count[className] += 1 @className = className @lines = [] container.parts << self end def to_html str = "
\n" % @className str += @lines.collect{ |line| line.to_html }.join # Don't put white-space after this so adjacent inline-block DiffBlockParts will not wrap. str += "
" end end class DiffSection def initialize(lines) lines.length >= 1 or raise "DiffSection.parse only received %d lines" % lines.length matches = START_OF_SECTION_FORMAT.match(lines[0]) if matches from, to = [matches[1].to_i, matches[3].to_i] if matches[2] and matches[4] from_end = from + matches[2].to_i to_end = to + matches[4].to_i end end @blocks = [] diff_block = nil diff_block_part = nil for line in lines[1...lines.length] startOfLine = line =~ /^[-\+ ]/ ? 1 : 0 text = line[startOfLine...line.length].chomp case line[0] when ?- if (diff_block_part.nil? or diff_block_part.className != 'remove') diff_block = DiffBlock.new(@blocks) diff_block_part = DiffBlockPart.new('remove', diff_block) end diff_block_part.lines << CodeLine.new(from, nil, text) from += 1 unless from.nil? when ?+ if (diff_block_part.nil? or diff_block_part.className != 'add') # Put add lines that immediately follow remove lines into the same DiffBlock. if (diff_block.nil? or diff_block_part.className != 'remove') diff_block = DiffBlock.new(@blocks) end diff_block_part = DiffBlockPart.new('add', diff_block) end diff_block_part.lines << CodeLine.new(nil, to, text) to += 1 unless to.nil? else if (diff_block_part.nil? or diff_block_part.className != 'shared') diff_block = DiffBlock.new(@blocks) diff_block_part = DiffBlockPart.new('shared', diff_block) end diff_block_part.lines << CodeLine.new(from, to, text) from += 1 unless from.nil? to += 1 unless to.nil? end break if from_end and to_end and from == from_end and to == to_end end changes = [ [ [], [] ] ] for block in @blocks for block_part in block.parts for line in block_part.lines if (!line.fromLineNumber.nil? and !line.toLineNumber.nil?) then changes << [ [], [] ] next end changes.last.first << line if line.toLineNumber.nil? changes.last.last << line if line.fromLineNumber.nil? end end end for change in changes next unless change.first.length == change.last.length for i in (0...change.first.length) from_text = change.first[i].text to_text = change.last[i].text next if from_text.length > MAXIMUM_INTRALINE_DIFF_LINE_LENGTH or to_text.length > MAXIMUM_INTRALINE_DIFF_LINE_LENGTH raw_operations = HTMLDiff::DiffBuilder.new(from_text, to_text).operations operations = [] back = 0 raw_operations.each_with_index do |operation, j| if operation.action == :equal and j < raw_operations.length - 1 length = operation.end_in_new - operation.start_in_new if length < SMALLEST_EQUAL_OPERATION back = length next end end operation.start_in_old -= back operation.start_in_new -= back back = 0 operations << operation end change.first[i].operations = operations change.last[i].operations = operations end end @blocks.unshift(ContextLine.new(matches[5])) unless matches.nil? || matches[5].empty? end def to_html str = "
\n" str += @blocks.collect{ |block| block.to_html }.join str += "
\n" end def self.parse(lines) linesForSections = lines.inject([[]]) do |sections, line| sections << [] if line =~ /^@@/ sections.last << line sections end linesForSections.delete_if { |lines| lines.nil? or lines.empty? } linesForSections.collect { |lines| DiffSection.new(lines) } end end class Line attr_reader :fromLineNumber attr_reader :toLineNumber attr_reader :text def initialize(from, to, text) @fromLineNumber = from @toLineNumber = to @text = text end def text_as_html CGI.escapeHTML(text) end def classes lineClasses = ["Line", "LineContainer"] lineClasses << ["add"] unless @toLineNumber.nil? or !@fromLineNumber.nil? lineClasses << ["remove"] unless @fromLineNumber.nil? or !@toLineNumber.nil? lineClasses end def to_html markedUpText = self.text_as_html str = "
\n" % self.classes.join(' ') str += "%s%s" % [@fromLineNumber.nil? ? ' ' : @fromLineNumber, @toLineNumber.nil? ? ' ' : @toLineNumber] unless @fromLineNumber.nil? and @toLineNumber.nil? str += "%s\n" % markedUpText str += "
\n" end end class CodeLine < Line attr :operations, true def text_as_html html = [] tag = @fromLineNumber.nil? ? "ins" : "del" if @operations.nil? or @operations.empty? return CGI.escapeHTML(@text) end @operations.each do |operation| start = @fromLineNumber.nil? ? operation.start_in_new : operation.start_in_old eend = @fromLineNumber.nil? ? operation.end_in_new : operation.end_in_old escaped_text = CGI.escapeHTML(@text[start...eend]) if eend - start === 0 or operation.action === :equal html << escaped_text else html << "<#{tag}>#{escaped_text}" end end html.join end end class ContextLine < Line def initialize(context) super("@", "@", context) end def classes super << "context" end end end