diff --git a/gm/rebaseline_server/compare_to_expectations.py b/gm/rebaseline_server/compare_to_expectations.py new file mode 100755 index 0000000000..f047b7b291 --- /dev/null +++ b/gm/rebaseline_server/compare_to_expectations.py @@ -0,0 +1,497 @@ +#!/usr/bin/python + +""" +Copyright 2013 Google Inc. + +Use of this source code is governed by a BSD-style license that can be +found in the LICENSE file. + +Repackage expected/actual GM results as needed by our HTML rebaseline viewer. +""" + +# System-level imports +import argparse +import fnmatch +import json +import logging +import os +import re +import sys +import time + +# Imports from within Skia +# +# TODO(epoger): Once we move the create_filepath_url() function out of +# download_actuals into a shared utility module, we won't need to import +# download_actuals anymore. +# +# We need to add the 'gm' directory, so that we can import gm_json.py within +# that directory. That script allows us to parse the actual-results.json file +# written out by the GM tool. +# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end* +# so any dirs that are already in the PYTHONPATH will be preferred. +PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) +GM_DIRECTORY = os.path.dirname(PARENT_DIRECTORY) +TRUNK_DIRECTORY = os.path.dirname(GM_DIRECTORY) +if GM_DIRECTORY not in sys.path: + sys.path.append(GM_DIRECTORY) +import download_actuals +import gm_json +import imagediffdb +import imagepair +import imagepairset +import results + +EXPECTATION_FIELDS_PASSED_THRU_VERBATIM = [ + results.KEY__EXPECTATIONS__BUGS, + results.KEY__EXPECTATIONS__IGNOREFAILURE, + results.KEY__EXPECTATIONS__REVIEWED, +] + +IMAGEPAIR_SET_DESCRIPTIONS = ('expected image', 'actual image') + +DEFAULT_ACTUALS_DIR = '.gm-actuals' +DEFAULT_EXPECTATIONS_DIR = os.path.join(TRUNK_DIRECTORY, 'expectations', 'gm') +DEFAULT_GENERATED_IMAGES_ROOT = os.path.join( + PARENT_DIRECTORY, '.generated-images') + + +class Results(object): + """ Loads actual and expected GM results into an ImagePairSet. + + Loads actual and expected results from all builders, except for those skipped + by _ignore_builder(). + + Once this object has been constructed, the results (in self._results[]) + are immutable. If you want to update the results based on updated JSON + file contents, you will need to create a new Results object.""" + + def __init__(self, actuals_root=DEFAULT_ACTUALS_DIR, + expected_root=DEFAULT_EXPECTATIONS_DIR, + generated_images_root=DEFAULT_GENERATED_IMAGES_ROOT, + diff_base_url=None): + """ + Args: + actuals_root: root directory containing all actual-results.json files + expected_root: root directory containing all expected-results.json files + generated_images_root: directory within which to create all pixel diffs; + if this directory does not yet exist, it will be created + diff_base_url: base URL within which the client should look for diff + images; if not specified, defaults to a "file:///" URL representation + of generated_images_root + """ + time_start = int(time.time()) + self._image_diff_db = imagediffdb.ImageDiffDB(generated_images_root) + self._diff_base_url = ( + diff_base_url or + download_actuals.create_filepath_url(generated_images_root)) + self._actuals_root = actuals_root + self._expected_root = expected_root + self._load_actual_and_expected() + self._timestamp = int(time.time()) + logging.info('Results complete; took %d seconds.' % + (self._timestamp - time_start)) + + def get_timestamp(self): + """Return the time at which this object was created, in seconds past epoch + (UTC). + """ + return self._timestamp + + def edit_expectations(self, modifications): + """Edit the expectations stored within this object and write them back + to disk. + + Note that this will NOT update the results stored in self._results[] ; + in order to see those updates, you must instantiate a new Results object + based on the (now updated) files on disk. + + Args: + modifications: a list of dictionaries, one for each expectation to update: + + [ + { + imagepair.KEY__EXPECTATIONS_DATA: { + results.KEY__EXPECTATIONS__BUGS: [123, 456], + results.KEY__EXPECTATIONS__IGNOREFAILURE: false, + results.KEY__EXPECTATIONS__REVIEWED: true, + }, + imagepair.KEY__EXTRA_COLUMN_VALUES: { + results.KEY__EXTRACOLUMN__BUILDER: 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug', + results.KEY__EXTRACOLUMN__CONFIG: '8888', + results.KEY__EXTRACOLUMN__TEST: 'bigmatrix', + }, + results.KEY__NEW_IMAGE_URL: 'bitmap-64bitMD5/bigmatrix/10894408024079689926.png', + }, + ... + ] + + """ + expected_builder_dicts = Results._read_dicts_from_root(self._expected_root) + for mod in modifications: + image_name = results.IMAGE_FILENAME_FORMATTER % ( + mod[imagepair.KEY__EXTRA_COLUMN_VALUES] + [results.KEY__EXTRACOLUMN__TEST], + mod[imagepair.KEY__EXTRA_COLUMN_VALUES] + [results.KEY__EXTRACOLUMN__CONFIG]) + _, hash_type, hash_digest = gm_json.SplitGmRelativeUrl( + mod[results.KEY__NEW_IMAGE_URL]) + allowed_digests = [[hash_type, int(hash_digest)]] + new_expectations = { + gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS: allowed_digests, + } + for field in EXPECTATION_FIELDS_PASSED_THRU_VERBATIM: + value = mod[imagepair.KEY__EXPECTATIONS_DATA].get(field) + if value is not None: + new_expectations[field] = value + builder_dict = expected_builder_dicts[ + mod[imagepair.KEY__EXTRA_COLUMN_VALUES] + [results.KEY__EXTRACOLUMN__BUILDER]] + builder_expectations = builder_dict.get(gm_json.JSONKEY_EXPECTEDRESULTS) + if not builder_expectations: + builder_expectations = {} + builder_dict[gm_json.JSONKEY_EXPECTEDRESULTS] = builder_expectations + builder_expectations[image_name] = new_expectations + Results._write_dicts_to_root(expected_builder_dicts, self._expected_root) + + def get_results_of_type(self, results_type): + """Return results of some/all tests (depending on 'results_type' parameter). + + Args: + results_type: string describing which types of results to include; must + be one of the RESULTS_* constants + + Results are returned in a dictionary as output by ImagePairSet.as_dict(). + """ + return self._results[results_type] + + def get_packaged_results_of_type(self, results_type, reload_seconds=None, + is_editable=False, is_exported=True): + """ Package the results of some/all tests as a complete response_dict. + + Args: + results_type: string indicating which set of results to return; + must be one of the RESULTS_* constants + reload_seconds: if specified, note that new results may be available once + these results are reload_seconds old + is_editable: whether clients are allowed to submit new baselines + is_exported: whether these results are being made available to other + network hosts + """ + response_dict = self._results[results_type] + time_updated = self.get_timestamp() + response_dict[results.KEY__HEADER] = { + results.KEY__HEADER__SCHEMA_VERSION: ( + results.REBASELINE_SERVER_SCHEMA_VERSION_NUMBER), + + # Timestamps: + # 1. when this data was last updated + # 2. when the caller should check back for new data (if ever) + results.KEY__HEADER__TIME_UPDATED: time_updated, + results.KEY__HEADER__TIME_NEXT_UPDATE_AVAILABLE: ( + (time_updated+reload_seconds) if reload_seconds else None), + + # The type we passed to get_results_of_type() + results.KEY__HEADER__TYPE: results_type, + + # Hash of dataset, which the client must return with any edits-- + # this ensures that the edits were made to a particular dataset. + results.KEY__HEADER__DATAHASH: str(hash(repr( + response_dict[imagepairset.KEY__IMAGEPAIRS]))), + + # Whether the server will accept edits back. + results.KEY__HEADER__IS_EDITABLE: is_editable, + + # Whether the service is accessible from other hosts. + results.KEY__HEADER__IS_EXPORTED: is_exported, + } + return response_dict + + @staticmethod + def _ignore_builder(builder): + """Returns True if we should ignore expectations and actuals for a builder. + + This allows us to ignore builders for which we don't maintain expectations + (trybots, Valgrind, ASAN, TSAN), and avoid problems like + https://code.google.com/p/skia/issues/detail?id=2036 ('rebaseline_server + produces error when trying to add baselines for ASAN/TSAN builders') + + Args: + builder: name of this builder, as a string + + Returns: + True if we should ignore expectations and actuals for this builder. + """ + return (builder.endswith('-Trybot') or + ('Valgrind' in builder) or + ('TSAN' in builder) or + ('ASAN' in builder)) + + @staticmethod + def _read_dicts_from_root(root, pattern='*.json'): + """Read all JSON dictionaries within a directory tree. + + Args: + root: path to root of directory tree + pattern: which files to read within root (fnmatch-style pattern) + + Returns: + A meta-dictionary containing all the JSON dictionaries found within + the directory tree, keyed by the builder name of each dictionary. + + Raises: + IOError if root does not refer to an existing directory + """ + if not os.path.isdir(root): + raise IOError('no directory found at path %s' % root) + meta_dict = {} + for dirpath, dirnames, filenames in os.walk(root): + for matching_filename in fnmatch.filter(filenames, pattern): + builder = os.path.basename(dirpath) + if Results._ignore_builder(builder): + continue + fullpath = os.path.join(dirpath, matching_filename) + meta_dict[builder] = gm_json.LoadFromFile(fullpath) + return meta_dict + + @staticmethod + def _create_relative_url(hashtype_and_digest, test_name): + """Returns the URL for this image, relative to GM_ACTUALS_ROOT_HTTP_URL. + + If we don't have a record of this image, returns None. + + Args: + hashtype_and_digest: (hash_type, hash_digest) tuple, or None if we + don't have a record of this image + test_name: string; name of the GM test that created this image + """ + if not hashtype_and_digest: + return None + return gm_json.CreateGmRelativeUrl( + test_name=test_name, + hash_type=hashtype_and_digest[0], + hash_digest=hashtype_and_digest[1]) + + @staticmethod + def _write_dicts_to_root(meta_dict, root, pattern='*.json'): + """Write all per-builder dictionaries within meta_dict to files under + the root path. + + Security note: this will only write to files that already exist within + the root path (as found by os.walk() within root), so we don't need to + worry about malformed content writing to disk outside of root. + However, the data written to those files is not double-checked, so it + could contain poisonous data. + + Args: + meta_dict: a builder-keyed meta-dictionary containing all the JSON + dictionaries we want to write out + root: path to root of directory tree within which to write files + pattern: which files to write within root (fnmatch-style pattern) + + Raises: + IOError if root does not refer to an existing directory + KeyError if the set of per-builder dictionaries written out was + different than expected + """ + if not os.path.isdir(root): + raise IOError('no directory found at path %s' % root) + actual_builders_written = [] + for dirpath, dirnames, filenames in os.walk(root): + for matching_filename in fnmatch.filter(filenames, pattern): + builder = os.path.basename(dirpath) + if Results._ignore_builder(builder): + continue + per_builder_dict = meta_dict.get(builder) + if per_builder_dict is not None: + fullpath = os.path.join(dirpath, matching_filename) + gm_json.WriteToFile(per_builder_dict, fullpath) + actual_builders_written.append(builder) + + # Check: did we write out the set of per-builder dictionaries we + # expected to? + expected_builders_written = sorted(meta_dict.keys()) + actual_builders_written.sort() + if expected_builders_written != actual_builders_written: + raise KeyError( + 'expected to write dicts for builders %s, but actually wrote them ' + 'for builders %s' % ( + expected_builders_written, actual_builders_written)) + + def _load_actual_and_expected(self): + """Loads the results of all tests, across all builders (based on the + files within self._actuals_root and self._expected_root), + and stores them in self._results. + """ + logging.info('Reading actual-results JSON files from %s...' % + self._actuals_root) + actual_builder_dicts = Results._read_dicts_from_root(self._actuals_root) + logging.info('Reading expected-results JSON files from %s...' % + self._expected_root) + expected_builder_dicts = Results._read_dicts_from_root(self._expected_root) + + all_image_pairs = imagepairset.ImagePairSet( + descriptions=IMAGEPAIR_SET_DESCRIPTIONS, + diff_base_url=self._diff_base_url) + failing_image_pairs = imagepairset.ImagePairSet( + descriptions=IMAGEPAIR_SET_DESCRIPTIONS, + diff_base_url=self._diff_base_url) + + all_image_pairs.ensure_extra_column_values_in_summary( + column_id=results.KEY__EXTRACOLUMN__RESULT_TYPE, values=[ + results.KEY__RESULT_TYPE__FAILED, + results.KEY__RESULT_TYPE__FAILUREIGNORED, + results.KEY__RESULT_TYPE__NOCOMPARISON, + results.KEY__RESULT_TYPE__SUCCEEDED, + ]) + failing_image_pairs.ensure_extra_column_values_in_summary( + column_id=results.KEY__EXTRACOLUMN__RESULT_TYPE, values=[ + results.KEY__RESULT_TYPE__FAILED, + results.KEY__RESULT_TYPE__FAILUREIGNORED, + results.KEY__RESULT_TYPE__NOCOMPARISON, + ]) + + builders = sorted(actual_builder_dicts.keys()) + num_builders = len(builders) + builder_num = 0 + for builder in builders: + builder_num += 1 + logging.info('Generating pixel diffs for builder #%d of %d, "%s"...' % + (builder_num, num_builders, builder)) + actual_results_for_this_builder = ( + actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS]) + for result_type in sorted(actual_results_for_this_builder.keys()): + results_of_this_type = actual_results_for_this_builder[result_type] + if not results_of_this_type: + continue + for image_name in sorted(results_of_this_type.keys()): + (test, config) = results.IMAGE_FILENAME_RE.match(image_name).groups() + actual_image_relative_url = Results._create_relative_url( + hashtype_and_digest=results_of_this_type[image_name], + test_name=test) + + # Default empty expectations; overwrite these if we find any real ones + expectations_per_test = None + expected_image_relative_url = None + expectations_dict = None + try: + expectations_per_test = ( + expected_builder_dicts + [builder][gm_json.JSONKEY_EXPECTEDRESULTS][image_name]) + # TODO(epoger): assumes a single allowed digest per test, which is + # fine; see https://code.google.com/p/skia/issues/detail?id=1787 + expected_image_hashtype_and_digest = ( + expectations_per_test + [gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS][0]) + expected_image_relative_url = Results._create_relative_url( + hashtype_and_digest=expected_image_hashtype_and_digest, + test_name=test) + expectations_dict = {} + for field in EXPECTATION_FIELDS_PASSED_THRU_VERBATIM: + expectations_dict[field] = expectations_per_test.get(field) + except (KeyError, TypeError): + # There are several cases in which we would expect to find + # no expectations for a given test: + # + # 1. result_type == NOCOMPARISON + # There are no expectations for this test yet! + # + # 2. alternate rendering mode failures (e.g. serialized) + # In cases like + # https://code.google.com/p/skia/issues/detail?id=1684 + # ('tileimagefilter GM test failing in serialized render mode'), + # the gm-actuals will list a failure for the alternate + # rendering mode even though we don't have explicit expectations + # for the test (the implicit expectation is that it must + # render the same in all rendering modes). + # + # Don't log type 1, because it is common. + # Log other types, because they are rare and we should know about + # them, but don't throw an exception, because we need to keep our + # tools working in the meanwhile! + if result_type != results.KEY__RESULT_TYPE__NOCOMPARISON: + logging.warning('No expectations found for test: %s' % { + results.KEY__EXTRACOLUMN__BUILDER: builder, + results.KEY__EXTRACOLUMN__RESULT_TYPE: result_type, + 'image_name': image_name, + }) + + # If this test was recently rebaselined, it will remain in + # the 'failed' set of actuals until all the bots have + # cycled (although the expectations have indeed been set + # from the most recent actuals). Treat these as successes + # instead of failures. + # + # TODO(epoger): Do we need to do something similar in + # other cases, such as when we have recently marked a test + # as ignoreFailure but it still shows up in the 'failed' + # category? Maybe we should not rely on the result_type + # categories recorded within the gm_actuals AT ALL, and + # instead evaluate the result_type ourselves based on what + # we see in expectations vs actual checksum? + if expected_image_relative_url == actual_image_relative_url: + updated_result_type = results.KEY__RESULT_TYPE__SUCCEEDED + else: + updated_result_type = result_type + extra_columns_dict = { + results.KEY__EXTRACOLUMN__RESULT_TYPE: updated_result_type, + results.KEY__EXTRACOLUMN__BUILDER: builder, + results.KEY__EXTRACOLUMN__TEST: test, + results.KEY__EXTRACOLUMN__CONFIG: config, + } + try: + image_pair = imagepair.ImagePair( + image_diff_db=self._image_diff_db, + base_url=gm_json.GM_ACTUALS_ROOT_HTTP_URL, + imageA_relative_url=expected_image_relative_url, + imageB_relative_url=actual_image_relative_url, + expectations=expectations_dict, + extra_columns=extra_columns_dict) + all_image_pairs.add_image_pair(image_pair) + if updated_result_type != results.KEY__RESULT_TYPE__SUCCEEDED: + failing_image_pairs.add_image_pair(image_pair) + except Exception: + logging.exception('got exception while creating new ImagePair') + + self._results = { + results.KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(), + results.KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(), + } + + +def main(): + logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', + datefmt='%m/%d/%Y %H:%M:%S', + level=logging.INFO) + parser = argparse.ArgumentParser() + parser.add_argument( + '--actuals', default=DEFAULT_ACTUALS_DIR, + help='Directory containing all actual-result JSON files') + parser.add_argument( + '--expectations', default=DEFAULT_EXPECTATIONS_DIR, + help='Directory containing all expected-result JSON files; defaults to ' + '\'%(default)s\' .') + parser.add_argument( + '--outfile', required=True, + help='File to write result summary into, in JSON format.') + parser.add_argument( + '--results', default=results.KEY__HEADER__RESULTS_FAILURES, + help='Which result types to include. Defaults to \'%(default)s\'; ' + 'must be one of ' + + str([results.KEY__HEADER__RESULTS_FAILURES, + results.KEY__HEADER__RESULTS_ALL])) + parser.add_argument( + '--workdir', default=DEFAULT_GENERATED_IMAGES_ROOT, + help='Directory within which to download images and generate diffs; ' + 'defaults to \'%(default)s\' .') + args = parser.parse_args() + results = Results(actuals_root=args.actuals, + expected_root=args.expectations, + generated_images_root=args.workdir) + gm_json.WriteToFile( + results.get_packaged_results_of_type(results_type=args.results), + args.outfile) + + +if __name__ == '__main__': + main() diff --git a/gm/rebaseline_server/results_test.py b/gm/rebaseline_server/compare_to_expectations_test.py similarity index 87% rename from gm/rebaseline_server/results_test.py rename to gm/rebaseline_server/compare_to_expectations_test.py index 6d7b05fcd3..c0bf19003f 100755 --- a/gm/rebaseline_server/results_test.py +++ b/gm/rebaseline_server/compare_to_expectations_test.py @@ -6,7 +6,7 @@ Copyright 2013 Google Inc. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. -Test results.py +Test compare_to_expectations.py TODO(epoger): Create a command to update the expected results (in self._output_dir_expected) when appropriate. For now, you should: @@ -23,15 +23,16 @@ import sys # Imports from within Skia import base_unittest +import compare_to_expectations import results import gm_json # must import results first, so that gm_json will be in sys.path -class ResultsTest(base_unittest.TestCase): +class CompareToExpectationsTest(base_unittest.TestCase): def test_gm(self): """Process results of a GM run with the Results object.""" - results_obj = results.Results( + results_obj = compare_to_expectations.Results( actuals_root=os.path.join(self._input_dir, 'gm-actuals'), expected_root=os.path.join(self._input_dir, 'gm-expectations'), generated_images_root=self._temp_dir, @@ -49,7 +50,7 @@ def mock_get_timestamp(): def main(): - base_unittest.main(ResultsTest) + base_unittest.main(CompareToExpectationsTest) if __name__ == '__main__': diff --git a/gm/rebaseline_server/results.py b/gm/rebaseline_server/results.py index 66105a4921..aadb6a7851 100755 --- a/gm/rebaseline_server/results.py +++ b/gm/rebaseline_server/results.py @@ -10,21 +10,12 @@ Repackage expected/actual GM results as needed by our HTML rebaseline viewer. """ # System-level imports -import argparse -import fnmatch -import json -import logging import os import re import sys -import time # Imports from within Skia # -# TODO(epoger): Once we move the create_filepath_url() function out of -# download_actuals into a shared utility module, we won't need to import -# download_actuals anymore. -# # We need to add the 'gm' directory, so that we can import gm_json.py within # that directory. That script allows us to parse the actual-results.json file # written out by the GM tool. @@ -32,14 +23,9 @@ import time # so any dirs that are already in the PYTHONPATH will be preferred. PARENT_DIRECTORY = os.path.dirname(os.path.realpath(__file__)) GM_DIRECTORY = os.path.dirname(PARENT_DIRECTORY) -TRUNK_DIRECTORY = os.path.dirname(GM_DIRECTORY) if GM_DIRECTORY not in sys.path: sys.path.append(GM_DIRECTORY) -import download_actuals import gm_json -import imagediffdb -import imagepair -import imagepairset # Keys used to link an image to a particular GM test. # NOTE: Keep these in sync with static/constants.js @@ -68,454 +54,5 @@ KEY__RESULT_TYPE__FAILUREIGNORED = gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED KEY__RESULT_TYPE__NOCOMPARISON = gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON KEY__RESULT_TYPE__SUCCEEDED = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED -EXPECTATION_FIELDS_PASSED_THRU_VERBATIM = [ - KEY__EXPECTATIONS__BUGS, - KEY__EXPECTATIONS__IGNOREFAILURE, - KEY__EXPECTATIONS__REVIEWED, -] - IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN) IMAGE_FILENAME_FORMATTER = '%s_%s.png' # pass in (testname, config) - -IMAGEPAIR_SET_DESCRIPTIONS = ('expected image', 'actual image') - -DEFAULT_ACTUALS_DIR = '.gm-actuals' -DEFAULT_EXPECTATIONS_DIR = os.path.join(TRUNK_DIRECTORY, 'expectations', 'gm') -DEFAULT_GENERATED_IMAGES_ROOT = os.path.join( - PARENT_DIRECTORY, '.generated-images') - - -class Results(object): - """ Loads actual and expected GM results into an ImagePairSet. - - Loads actual and expected results from all builders, except for those skipped - by _ignore_builder(). - - Once this object has been constructed, the results (in self._results[]) - are immutable. If you want to update the results based on updated JSON - file contents, you will need to create a new Results object.""" - - def __init__(self, actuals_root=DEFAULT_ACTUALS_DIR, - expected_root=DEFAULT_EXPECTATIONS_DIR, - generated_images_root=DEFAULT_GENERATED_IMAGES_ROOT, - diff_base_url=None): - """ - Args: - actuals_root: root directory containing all actual-results.json files - expected_root: root directory containing all expected-results.json files - generated_images_root: directory within which to create all pixel diffs; - if this directory does not yet exist, it will be created - diff_base_url: base URL within which the client should look for diff - images; if not specified, defaults to a "file:///" URL representation - of generated_images_root - """ - time_start = int(time.time()) - self._image_diff_db = imagediffdb.ImageDiffDB(generated_images_root) - self._diff_base_url = ( - diff_base_url or - download_actuals.create_filepath_url(generated_images_root)) - self._actuals_root = actuals_root - self._expected_root = expected_root - self._load_actual_and_expected() - self._timestamp = int(time.time()) - logging.info('Results complete; took %d seconds.' % - (self._timestamp - time_start)) - - def get_timestamp(self): - """Return the time at which this object was created, in seconds past epoch - (UTC). - """ - return self._timestamp - - def edit_expectations(self, modifications): - """Edit the expectations stored within this object and write them back - to disk. - - Note that this will NOT update the results stored in self._results[] ; - in order to see those updates, you must instantiate a new Results object - based on the (now updated) files on disk. - - Args: - modifications: a list of dictionaries, one for each expectation to update: - - [ - { - imagepair.KEY__EXPECTATIONS_DATA: { - KEY__EXPECTATIONS__BUGS: [123, 456], - KEY__EXPECTATIONS__IGNOREFAILURE: false, - KEY__EXPECTATIONS__REVIEWED: true, - }, - imagepair.KEY__EXTRA_COLUMN_VALUES: { - KEY__EXTRACOLUMN__BUILDER: 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug', - KEY__EXTRACOLUMN__CONFIG: '8888', - KEY__EXTRACOLUMN__TEST: 'bigmatrix', - }, - KEY__NEW_IMAGE_URL: 'bitmap-64bitMD5/bigmatrix/10894408024079689926.png', - }, - ... - ] - - """ - expected_builder_dicts = Results._read_dicts_from_root(self._expected_root) - for mod in modifications: - image_name = IMAGE_FILENAME_FORMATTER % ( - mod[imagepair.KEY__EXTRA_COLUMN_VALUES][KEY__EXTRACOLUMN__TEST], - mod[imagepair.KEY__EXTRA_COLUMN_VALUES][KEY__EXTRACOLUMN__CONFIG]) - _, hash_type, hash_digest = gm_json.SplitGmRelativeUrl( - mod[KEY__NEW_IMAGE_URL]) - allowed_digests = [[hash_type, int(hash_digest)]] - new_expectations = { - gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS: allowed_digests, - } - for field in EXPECTATION_FIELDS_PASSED_THRU_VERBATIM: - value = mod[imagepair.KEY__EXPECTATIONS_DATA].get(field) - if value is not None: - new_expectations[field] = value - builder_dict = expected_builder_dicts[ - mod[imagepair.KEY__EXTRA_COLUMN_VALUES][KEY__EXTRACOLUMN__BUILDER]] - builder_expectations = builder_dict.get(gm_json.JSONKEY_EXPECTEDRESULTS) - if not builder_expectations: - builder_expectations = {} - builder_dict[gm_json.JSONKEY_EXPECTEDRESULTS] = builder_expectations - builder_expectations[image_name] = new_expectations - Results._write_dicts_to_root(expected_builder_dicts, self._expected_root) - - def get_results_of_type(self, results_type): - """Return results of some/all tests (depending on 'results_type' parameter). - - Args: - results_type: string describing which types of results to include; must - be one of the RESULTS_* constants - - Results are returned in a dictionary as output by ImagePairSet.as_dict(). - """ - return self._results[results_type] - - def get_packaged_results_of_type(self, results_type, reload_seconds=None, - is_editable=False, is_exported=True): - """ Package the results of some/all tests as a complete response_dict. - - Args: - results_type: string indicating which set of results to return; - must be one of the RESULTS_* constants - reload_seconds: if specified, note that new results may be available once - these results are reload_seconds old - is_editable: whether clients are allowed to submit new baselines - is_exported: whether these results are being made available to other - network hosts - """ - response_dict = self._results[results_type] - time_updated = self.get_timestamp() - response_dict[KEY__HEADER] = { - KEY__HEADER__SCHEMA_VERSION: REBASELINE_SERVER_SCHEMA_VERSION_NUMBER, - - # Timestamps: - # 1. when this data was last updated - # 2. when the caller should check back for new data (if ever) - KEY__HEADER__TIME_UPDATED: time_updated, - KEY__HEADER__TIME_NEXT_UPDATE_AVAILABLE: ( - (time_updated+reload_seconds) if reload_seconds else None), - - # The type we passed to get_results_of_type() - KEY__HEADER__TYPE: results_type, - - # Hash of dataset, which the client must return with any edits-- - # this ensures that the edits were made to a particular dataset. - KEY__HEADER__DATAHASH: str(hash(repr( - response_dict[imagepairset.KEY__IMAGEPAIRS]))), - - # Whether the server will accept edits back. - KEY__HEADER__IS_EDITABLE: is_editable, - - # Whether the service is accessible from other hosts. - KEY__HEADER__IS_EXPORTED: is_exported, - } - return response_dict - - @staticmethod - def _ignore_builder(builder): - """Returns True if we should ignore expectations and actuals for a builder. - - This allows us to ignore builders for which we don't maintain expectations - (trybots, Valgrind, ASAN, TSAN), and avoid problems like - https://code.google.com/p/skia/issues/detail?id=2036 ('rebaseline_server - produces error when trying to add baselines for ASAN/TSAN builders') - - Args: - builder: name of this builder, as a string - - Returns: - True if we should ignore expectations and actuals for this builder. - """ - return (builder.endswith('-Trybot') or - ('Valgrind' in builder) or - ('TSAN' in builder) or - ('ASAN' in builder)) - - @staticmethod - def _read_dicts_from_root(root, pattern='*.json'): - """Read all JSON dictionaries within a directory tree. - - Args: - root: path to root of directory tree - pattern: which files to read within root (fnmatch-style pattern) - - Returns: - A meta-dictionary containing all the JSON dictionaries found within - the directory tree, keyed by the builder name of each dictionary. - - Raises: - IOError if root does not refer to an existing directory - """ - if not os.path.isdir(root): - raise IOError('no directory found at path %s' % root) - meta_dict = {} - for dirpath, dirnames, filenames in os.walk(root): - for matching_filename in fnmatch.filter(filenames, pattern): - builder = os.path.basename(dirpath) - if Results._ignore_builder(builder): - continue - fullpath = os.path.join(dirpath, matching_filename) - meta_dict[builder] = gm_json.LoadFromFile(fullpath) - return meta_dict - - @staticmethod - def _create_relative_url(hashtype_and_digest, test_name): - """Returns the URL for this image, relative to GM_ACTUALS_ROOT_HTTP_URL. - - If we don't have a record of this image, returns None. - - Args: - hashtype_and_digest: (hash_type, hash_digest) tuple, or None if we - don't have a record of this image - test_name: string; name of the GM test that created this image - """ - if not hashtype_and_digest: - return None - return gm_json.CreateGmRelativeUrl( - test_name=test_name, - hash_type=hashtype_and_digest[0], - hash_digest=hashtype_and_digest[1]) - - @staticmethod - def _write_dicts_to_root(meta_dict, root, pattern='*.json'): - """Write all per-builder dictionaries within meta_dict to files under - the root path. - - Security note: this will only write to files that already exist within - the root path (as found by os.walk() within root), so we don't need to - worry about malformed content writing to disk outside of root. - However, the data written to those files is not double-checked, so it - could contain poisonous data. - - Args: - meta_dict: a builder-keyed meta-dictionary containing all the JSON - dictionaries we want to write out - root: path to root of directory tree within which to write files - pattern: which files to write within root (fnmatch-style pattern) - - Raises: - IOError if root does not refer to an existing directory - KeyError if the set of per-builder dictionaries written out was - different than expected - """ - if not os.path.isdir(root): - raise IOError('no directory found at path %s' % root) - actual_builders_written = [] - for dirpath, dirnames, filenames in os.walk(root): - for matching_filename in fnmatch.filter(filenames, pattern): - builder = os.path.basename(dirpath) - if Results._ignore_builder(builder): - continue - per_builder_dict = meta_dict.get(builder) - if per_builder_dict is not None: - fullpath = os.path.join(dirpath, matching_filename) - gm_json.WriteToFile(per_builder_dict, fullpath) - actual_builders_written.append(builder) - - # Check: did we write out the set of per-builder dictionaries we - # expected to? - expected_builders_written = sorted(meta_dict.keys()) - actual_builders_written.sort() - if expected_builders_written != actual_builders_written: - raise KeyError( - 'expected to write dicts for builders %s, but actually wrote them ' - 'for builders %s' % ( - expected_builders_written, actual_builders_written)) - - def _load_actual_and_expected(self): - """Loads the results of all tests, across all builders (based on the - files within self._actuals_root and self._expected_root), - and stores them in self._results. - """ - logging.info('Reading actual-results JSON files from %s...' % - self._actuals_root) - actual_builder_dicts = Results._read_dicts_from_root(self._actuals_root) - logging.info('Reading expected-results JSON files from %s...' % - self._expected_root) - expected_builder_dicts = Results._read_dicts_from_root(self._expected_root) - - all_image_pairs = imagepairset.ImagePairSet( - descriptions=IMAGEPAIR_SET_DESCRIPTIONS, - diff_base_url=self._diff_base_url) - failing_image_pairs = imagepairset.ImagePairSet( - descriptions=IMAGEPAIR_SET_DESCRIPTIONS, - diff_base_url=self._diff_base_url) - - all_image_pairs.ensure_extra_column_values_in_summary( - column_id=KEY__EXTRACOLUMN__RESULT_TYPE, values=[ - KEY__RESULT_TYPE__FAILED, - KEY__RESULT_TYPE__FAILUREIGNORED, - KEY__RESULT_TYPE__NOCOMPARISON, - KEY__RESULT_TYPE__SUCCEEDED, - ]) - failing_image_pairs.ensure_extra_column_values_in_summary( - column_id=KEY__EXTRACOLUMN__RESULT_TYPE, values=[ - KEY__RESULT_TYPE__FAILED, - KEY__RESULT_TYPE__FAILUREIGNORED, - KEY__RESULT_TYPE__NOCOMPARISON, - ]) - - builders = sorted(actual_builder_dicts.keys()) - num_builders = len(builders) - builder_num = 0 - for builder in builders: - builder_num += 1 - logging.info('Generating pixel diffs for builder #%d of %d, "%s"...' % - (builder_num, num_builders, builder)) - actual_results_for_this_builder = ( - actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS]) - for result_type in sorted(actual_results_for_this_builder.keys()): - results_of_this_type = actual_results_for_this_builder[result_type] - if not results_of_this_type: - continue - for image_name in sorted(results_of_this_type.keys()): - (test, config) = IMAGE_FILENAME_RE.match(image_name).groups() - actual_image_relative_url = Results._create_relative_url( - hashtype_and_digest=results_of_this_type[image_name], - test_name=test) - - # Default empty expectations; overwrite these if we find any real ones - expectations_per_test = None - expected_image_relative_url = None - expectations_dict = None - try: - expectations_per_test = ( - expected_builder_dicts - [builder][gm_json.JSONKEY_EXPECTEDRESULTS][image_name]) - # TODO(epoger): assumes a single allowed digest per test, which is - # fine; see https://code.google.com/p/skia/issues/detail?id=1787 - expected_image_hashtype_and_digest = ( - expectations_per_test - [gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS][0]) - expected_image_relative_url = Results._create_relative_url( - hashtype_and_digest=expected_image_hashtype_and_digest, - test_name=test) - expectations_dict = {} - for field in EXPECTATION_FIELDS_PASSED_THRU_VERBATIM: - expectations_dict[field] = expectations_per_test.get(field) - except (KeyError, TypeError): - # There are several cases in which we would expect to find - # no expectations for a given test: - # - # 1. result_type == NOCOMPARISON - # There are no expectations for this test yet! - # - # 2. alternate rendering mode failures (e.g. serialized) - # In cases like - # https://code.google.com/p/skia/issues/detail?id=1684 - # ('tileimagefilter GM test failing in serialized render mode'), - # the gm-actuals will list a failure for the alternate - # rendering mode even though we don't have explicit expectations - # for the test (the implicit expectation is that it must - # render the same in all rendering modes). - # - # Don't log type 1, because it is common. - # Log other types, because they are rare and we should know about - # them, but don't throw an exception, because we need to keep our - # tools working in the meanwhile! - if result_type != KEY__RESULT_TYPE__NOCOMPARISON: - logging.warning('No expectations found for test: %s' % { - KEY__EXTRACOLUMN__BUILDER: builder, - KEY__EXTRACOLUMN__RESULT_TYPE: result_type, - 'image_name': image_name, - }) - - # If this test was recently rebaselined, it will remain in - # the 'failed' set of actuals until all the bots have - # cycled (although the expectations have indeed been set - # from the most recent actuals). Treat these as successes - # instead of failures. - # - # TODO(epoger): Do we need to do something similar in - # other cases, such as when we have recently marked a test - # as ignoreFailure but it still shows up in the 'failed' - # category? Maybe we should not rely on the result_type - # categories recorded within the gm_actuals AT ALL, and - # instead evaluate the result_type ourselves based on what - # we see in expectations vs actual checksum? - if expected_image_relative_url == actual_image_relative_url: - updated_result_type = KEY__RESULT_TYPE__SUCCEEDED - else: - updated_result_type = result_type - extra_columns_dict = { - KEY__EXTRACOLUMN__RESULT_TYPE: updated_result_type, - KEY__EXTRACOLUMN__BUILDER: builder, - KEY__EXTRACOLUMN__TEST: test, - KEY__EXTRACOLUMN__CONFIG: config, - } - try: - image_pair = imagepair.ImagePair( - image_diff_db=self._image_diff_db, - base_url=gm_json.GM_ACTUALS_ROOT_HTTP_URL, - imageA_relative_url=expected_image_relative_url, - imageB_relative_url=actual_image_relative_url, - expectations=expectations_dict, - extra_columns=extra_columns_dict) - all_image_pairs.add_image_pair(image_pair) - if updated_result_type != KEY__RESULT_TYPE__SUCCEEDED: - failing_image_pairs.add_image_pair(image_pair) - except Exception: - logging.exception('got exception while creating new ImagePair') - - self._results = { - KEY__HEADER__RESULTS_ALL: all_image_pairs.as_dict(), - KEY__HEADER__RESULTS_FAILURES: failing_image_pairs.as_dict(), - } - - -def main(): - logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', - datefmt='%m/%d/%Y %H:%M:%S', - level=logging.INFO) - parser = argparse.ArgumentParser() - parser.add_argument( - '--actuals', default=DEFAULT_ACTUALS_DIR, - help='Directory containing all actual-result JSON files') - parser.add_argument( - '--expectations', default=DEFAULT_EXPECTATIONS_DIR, - help='Directory containing all expected-result JSON files; defaults to ' - '\'%(default)s\' .') - parser.add_argument( - '--outfile', required=True, - help='File to write result summary into, in JSON format.') - parser.add_argument( - '--results', default=KEY__HEADER__RESULTS_FAILURES, - help='Which result types to include. Defaults to \'%(default)s\'; ' - 'must be one of ' + - str([KEY__HEADER__RESULTS_FAILURES, KEY__HEADER__RESULTS_ALL])) - parser.add_argument( - '--workdir', default=DEFAULT_GENERATED_IMAGES_ROOT, - help='Directory within which to download images and generate diffs; ' - 'defaults to \'%(default)s\' .') - args = parser.parse_args() - results = Results(actuals_root=args.actuals, - expected_root=args.expectations, - generated_images_root=args.workdir) - gm_json.WriteToFile( - results.get_packaged_results_of_type(results_type=args.results), - args.outfile) - - -if __name__ == '__main__': - main() diff --git a/gm/rebaseline_server/server.py b/gm/rebaseline_server/server.py index bf9b125e1f..46b99ff65a 100755 --- a/gm/rebaseline_server/server.py +++ b/gm/rebaseline_server/server.py @@ -44,6 +44,7 @@ import svn # Note: we import results under a different name, to avoid confusion with the # Server.results() property. See discussion at # https://codereview.chromium.org/195943004/diff/1/gm/rebaseline_server/server.py#newcode44 +import compare_to_expectations import imagepairset import results as results_mod @@ -66,7 +67,7 @@ KEY__EDITS__MODIFICATIONS = 'modifications' KEY__EDITS__OLD_RESULTS_HASH = 'oldResultsHash' KEY__EDITS__OLD_RESULTS_TYPE = 'oldResultsType' -DEFAULT_ACTUALS_DIR = results_mod.DEFAULT_ACTUALS_DIR +DEFAULT_ACTUALS_DIR = compare_to_expectations.DEFAULT_ACTUALS_DIR DEFAULT_ACTUALS_REPO_REVISION = 'HEAD' DEFAULT_ACTUALS_REPO_URL = 'http://skia-autogen.googlecode.com/svn/gm-actual' DEFAULT_PORT = 8888 @@ -233,10 +234,10 @@ class Server(object): if self._reload_seconds: logging.info( 'Updating expected GM results in %s by syncing Skia repo ...' % - results_mod.DEFAULT_EXPECTATIONS_DIR) + compare_to_expectations.DEFAULT_EXPECTATIONS_DIR) _run_command(['gclient', 'sync'], TRUNK_DIRECTORY) - self._results = results_mod.Results( + self._results = compare_to_expectations.Results( actuals_root=self._actuals_dir, generated_images_root=os.path.join( PARENT_DIRECTORY, STATIC_CONTENTS_SUBDIR, @@ -405,7 +406,7 @@ class HTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): # client and server apply # modifications to the same base) KEY__EDITS__MODIFICATIONS: [ - # as needed by results_mod.edit_expectations() + # as needed by compare_to_expectations.edit_expectations() ... ], } diff --git a/gm/rebaseline_server/testdata/outputs/expected/results_test.ResultsTest.test_gm/gm.json b/gm/rebaseline_server/testdata/outputs/expected/compare_to_expectations_test.CompareToExpectationsTest.test_gm/gm.json similarity index 100% rename from gm/rebaseline_server/testdata/outputs/expected/results_test.ResultsTest.test_gm/gm.json rename to gm/rebaseline_server/testdata/outputs/expected/compare_to_expectations_test.CompareToExpectationsTest.test_gm/gm.json