6132b436d8
Rather than rebuilding the ImageDiffDB from scratch every time we update expected/actual results, keep the same ImageDiffDB instance around and add image pairs to it. This makes updating results within a long-running server.py *much* faster, and tests out an idea I'm ruminating in https://goto.google.com/LongRunningImageDiffDBServer : we could run an ImageDiffDB server that developers could connect to from their locally running rebaseline_server instances, for much faster launch times. BUG=skia:2414 NOTRY=True R=rmistry@google.com Author: epoger@google.com Review URL: https://codereview.chromium.org/379563005
340 lines
13 KiB
Python
340 lines
13 KiB
Python
#!/usr/bin/python
|
|
|
|
"""
|
|
Copyright 2013 Google Inc.
|
|
|
|
Use of this source code is governed by a BSD-style license that can be
|
|
found in the LICENSE file.
|
|
|
|
Calulate differences between image pairs, and store them in a database.
|
|
"""
|
|
|
|
import contextlib
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import shutil
|
|
import sys
|
|
import tempfile
|
|
import urllib
|
|
|
|
# Set the PYTHONPATH to include the tools directory.
|
|
sys.path.append(
|
|
os.path.join(
|
|
os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,
|
|
'tools'))
|
|
import find_run_binary
|
|
|
|
SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')
|
|
|
|
DEFAULT_IMAGE_SUFFIX = '.png'
|
|
DEFAULT_IMAGES_SUBDIR = 'images'
|
|
|
|
DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
|
|
|
|
RGBDIFFS_SUBDIR = 'diffs'
|
|
WHITEDIFFS_SUBDIR = 'whitediffs'
|
|
|
|
# Keys used within DiffRecord dictionary representations.
|
|
# NOTE: Keep these in sync with static/constants.js
|
|
KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'
|
|
KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels'
|
|
KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
|
|
KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference'
|
|
|
|
|
|
class DiffRecord(object):
|
|
""" Record of differences between two images. """
|
|
|
|
def __init__(self, storage_root,
|
|
expected_image_url, expected_image_locator,
|
|
actual_image_url, actual_image_locator,
|
|
expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
|
|
actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
|
|
image_suffix=DEFAULT_IMAGE_SUFFIX):
|
|
"""Download this pair of images (unless we already have them on local disk),
|
|
and prepare a DiffRecord for them.
|
|
|
|
TODO(epoger): Make this asynchronously download images, rather than blocking
|
|
until the images have been downloaded and processed.
|
|
|
|
Args:
|
|
storage_root: root directory on local disk within which we store all
|
|
images
|
|
expected_image_url: file or HTTP url from which we will download the
|
|
expected image
|
|
expected_image_locator: a unique ID string under which we will store the
|
|
expected image within storage_root (probably including a checksum to
|
|
guarantee uniqueness)
|
|
actual_image_url: file or HTTP url from which we will download the
|
|
actual image
|
|
actual_image_locator: a unique ID string under which we will store the
|
|
actual image within storage_root (probably including a checksum to
|
|
guarantee uniqueness)
|
|
expected_images_subdir: the subdirectory expected images are stored in.
|
|
actual_images_subdir: the subdirectory actual images are stored in.
|
|
image_suffix: the suffix of images.
|
|
"""
|
|
expected_image_locator = _sanitize_locator(expected_image_locator)
|
|
actual_image_locator = _sanitize_locator(actual_image_locator)
|
|
|
|
# Download the expected/actual images, if we don't have them already.
|
|
# TODO(rmistry): Add a parameter that just tries to use already-present
|
|
# image files rather than downloading them.
|
|
expected_image_file = os.path.join(
|
|
storage_root, expected_images_subdir,
|
|
str(expected_image_locator) + image_suffix)
|
|
actual_image_file = os.path.join(
|
|
storage_root, actual_images_subdir,
|
|
str(actual_image_locator) + image_suffix)
|
|
try:
|
|
_download_file(expected_image_file, expected_image_url)
|
|
except Exception:
|
|
logging.exception('unable to download expected_image_url %s to file %s' %
|
|
(expected_image_url, expected_image_file))
|
|
raise
|
|
try:
|
|
_download_file(actual_image_file, actual_image_url)
|
|
except Exception:
|
|
logging.exception('unable to download actual_image_url %s to file %s' %
|
|
(actual_image_url, actual_image_file))
|
|
raise
|
|
|
|
# Get all diff images and values from skpdiff binary.
|
|
skpdiff_output_dir = tempfile.mkdtemp()
|
|
try:
|
|
skpdiff_summary_file = os.path.join(skpdiff_output_dir,
|
|
'skpdiff-output.json')
|
|
skpdiff_rgbdiff_dir = os.path.join(skpdiff_output_dir, 'rgbDiff')
|
|
skpdiff_whitediff_dir = os.path.join(skpdiff_output_dir, 'whiteDiff')
|
|
expected_img = os.path.join(storage_root, expected_images_subdir,
|
|
str(expected_image_locator) + image_suffix)
|
|
actual_img = os.path.join(storage_root, actual_images_subdir,
|
|
str(actual_image_locator) + image_suffix)
|
|
|
|
# TODO: Call skpdiff ONCE for all image pairs, instead of calling it
|
|
# repeatedly. This will allow us to parallelize a lot more work.
|
|
find_run_binary.run_command(
|
|
[SKPDIFF_BINARY, '-p', expected_img, actual_img,
|
|
'--jsonp', 'false',
|
|
'--output', skpdiff_summary_file,
|
|
'--differs', 'perceptual', 'different_pixels',
|
|
'--rgbDiffDir', skpdiff_rgbdiff_dir,
|
|
'--whiteDiffDir', skpdiff_whitediff_dir,
|
|
])
|
|
|
|
# Get information out of the skpdiff_summary_file.
|
|
with contextlib.closing(open(skpdiff_summary_file)) as fp:
|
|
data = json.load(fp)
|
|
|
|
# For now, we can assume there is only one record in the output summary,
|
|
# since we passed skpdiff only one pair of images.
|
|
record = data['records'][0]
|
|
self._width = record['width']
|
|
self._height = record['height']
|
|
# TODO: make max_diff_per_channel a tuple instead of a list, because the
|
|
# structure is meaningful (first element is red, second is green, etc.)
|
|
# See http://stackoverflow.com/a/626871
|
|
self._max_diff_per_channel = [
|
|
record['maxRedDiff'], record['maxGreenDiff'], record['maxBlueDiff']]
|
|
rgb_diff_path = record['rgbDiffPath']
|
|
white_diff_path = record['whiteDiffPath']
|
|
per_differ_stats = record['diffs']
|
|
for stats in per_differ_stats:
|
|
differ_name = stats['differName']
|
|
if differ_name == 'different_pixels':
|
|
self._num_pixels_differing = stats['pointsOfInterest']
|
|
elif differ_name == 'perceptual':
|
|
perceptual_similarity = stats['result']
|
|
|
|
# skpdiff returns the perceptual similarity; convert it to get the
|
|
# perceptual difference percentage.
|
|
# skpdiff outputs -1 if the images are different sizes. Treat any
|
|
# output that does not lie in [0, 1] as having 0% perceptual
|
|
# similarity.
|
|
if not 0 <= perceptual_similarity <= 1:
|
|
perceptual_similarity = 0
|
|
self._perceptual_difference = 100 - (perceptual_similarity * 100)
|
|
|
|
# Store the rgbdiff and whitediff images generated above.
|
|
diff_image_locator = _get_difference_locator(
|
|
expected_image_locator=expected_image_locator,
|
|
actual_image_locator=actual_image_locator)
|
|
basename = str(diff_image_locator) + image_suffix
|
|
_mkdir_unless_exists(os.path.join(storage_root, RGBDIFFS_SUBDIR))
|
|
_mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR))
|
|
# TODO: Modify skpdiff's behavior so we can tell it exactly where to
|
|
# write the image files into, rather than having to move them around
|
|
# after skpdiff writes them out.
|
|
shutil.copyfile(rgb_diff_path,
|
|
os.path.join(storage_root, RGBDIFFS_SUBDIR, basename))
|
|
shutil.copyfile(white_diff_path,
|
|
os.path.join(storage_root, WHITEDIFFS_SUBDIR, basename))
|
|
|
|
finally:
|
|
shutil.rmtree(skpdiff_output_dir)
|
|
|
|
# TODO(epoger): Use properties instead of getters throughout.
|
|
# See http://stackoverflow.com/a/6618176
|
|
def get_num_pixels_differing(self):
|
|
"""Returns the absolute number of pixels that differ."""
|
|
return self._num_pixels_differing
|
|
|
|
def get_percent_pixels_differing(self):
|
|
"""Returns the percentage of pixels that differ, as a float between
|
|
0 and 100 (inclusive)."""
|
|
return ((float(self._num_pixels_differing) * 100) /
|
|
(self._width * self._height))
|
|
|
|
def get_perceptual_difference(self):
|
|
"""Returns the perceptual difference percentage."""
|
|
return self._perceptual_difference
|
|
|
|
def get_max_diff_per_channel(self):
|
|
"""Returns the maximum difference between the expected and actual images
|
|
for each R/G/B channel, as a list."""
|
|
return self._max_diff_per_channel
|
|
|
|
def as_dict(self):
|
|
"""Returns a dictionary representation of this DiffRecord, as needed when
|
|
constructing the JSON representation."""
|
|
return {
|
|
KEY__DIFFERENCES__NUM_DIFF_PIXELS: self._num_pixels_differing,
|
|
KEY__DIFFERENCES__PERCENT_DIFF_PIXELS:
|
|
self.get_percent_pixels_differing(),
|
|
KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,
|
|
KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference,
|
|
}
|
|
|
|
|
|
class ImageDiffDB(object):
|
|
""" Calculates differences between image pairs, maintaining a database of
|
|
them for download."""
|
|
|
|
def __init__(self, storage_root):
|
|
"""
|
|
Args:
|
|
storage_root: string; root path within the DB will store all of its stuff
|
|
"""
|
|
self._storage_root = storage_root
|
|
|
|
# Dictionary of DiffRecords, keyed by (expected_image_locator,
|
|
# actual_image_locator) tuples.
|
|
self._diff_dict = {}
|
|
|
|
@property
|
|
def storage_root(self):
|
|
return self._storage_root
|
|
|
|
def add_image_pair(self,
|
|
expected_image_url, expected_image_locator,
|
|
actual_image_url, actual_image_locator):
|
|
"""Download this pair of images (unless we already have them on local disk),
|
|
and prepare a DiffRecord for them.
|
|
|
|
TODO(epoger): Make this asynchronously download images, rather than blocking
|
|
until the images have been downloaded and processed.
|
|
When we do that, we should probably add a new method that will block
|
|
until all of the images have been downloaded and processed. Otherwise,
|
|
we won't know when it's safe to start calling get_diff_record().
|
|
jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
|
|
thread-pool/worker queue at a higher level that just uses ImageDiffDB?
|
|
|
|
Args:
|
|
expected_image_url: file or HTTP url from which we will download the
|
|
expected image
|
|
expected_image_locator: a unique ID string under which we will store the
|
|
expected image within storage_root (probably including a checksum to
|
|
guarantee uniqueness)
|
|
actual_image_url: file or HTTP url from which we will download the
|
|
actual image
|
|
actual_image_locator: a unique ID string under which we will store the
|
|
actual image within storage_root (probably including a checksum to
|
|
guarantee uniqueness)
|
|
"""
|
|
expected_image_locator = _sanitize_locator(expected_image_locator)
|
|
actual_image_locator = _sanitize_locator(actual_image_locator)
|
|
key = (expected_image_locator, actual_image_locator)
|
|
if not key in self._diff_dict:
|
|
try:
|
|
new_diff_record = DiffRecord(
|
|
self._storage_root,
|
|
expected_image_url=expected_image_url,
|
|
expected_image_locator=expected_image_locator,
|
|
actual_image_url=actual_image_url,
|
|
actual_image_locator=actual_image_locator)
|
|
except Exception:
|
|
# If we can't create a real DiffRecord for this (expected, actual) pair,
|
|
# store None and the UI will show whatever information we DO have.
|
|
# Fixes http://skbug.com/2368 .
|
|
logging.exception(
|
|
'got exception while creating a DiffRecord for '
|
|
'expected_image_url=%s , actual_image_url=%s; returning None' % (
|
|
expected_image_url, actual_image_url))
|
|
new_diff_record = None
|
|
self._diff_dict[key] = new_diff_record
|
|
|
|
def get_diff_record(self, expected_image_locator, actual_image_locator):
|
|
"""Returns the DiffRecord for this image pair.
|
|
|
|
Raises a KeyError if we don't have a DiffRecord for this image pair.
|
|
"""
|
|
key = (_sanitize_locator(expected_image_locator),
|
|
_sanitize_locator(actual_image_locator))
|
|
return self._diff_dict[key]
|
|
|
|
|
|
# Utility functions
|
|
|
|
def _download_file(local_filepath, url):
|
|
"""Download a file from url to local_filepath, unless it is already there.
|
|
|
|
Args:
|
|
local_filepath: path on local disk where the image should be stored
|
|
url: URL from which we can download the image if we don't have it yet
|
|
"""
|
|
if not os.path.exists(local_filepath):
|
|
_mkdir_unless_exists(os.path.dirname(local_filepath))
|
|
with contextlib.closing(urllib.urlopen(url)) as url_handle:
|
|
with open(local_filepath, 'wb') as file_handle:
|
|
shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
|
|
|
|
|
|
def _mkdir_unless_exists(path):
|
|
"""Unless path refers to an already-existing directory, create it.
|
|
|
|
Args:
|
|
path: path on local disk
|
|
"""
|
|
if not os.path.isdir(path):
|
|
os.makedirs(path)
|
|
|
|
|
|
def _sanitize_locator(locator):
|
|
"""Returns a sanitized version of a locator (one in which we know none of the
|
|
characters will have special meaning in filenames).
|
|
|
|
Args:
|
|
locator: string, or something that can be represented as a string
|
|
"""
|
|
return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
|
|
|
|
|
|
def _get_difference_locator(expected_image_locator, actual_image_locator):
|
|
"""Returns the locator string used to look up the diffs between expected_image
|
|
and actual_image.
|
|
|
|
We must keep this function in sync with getImageDiffRelativeUrl() in
|
|
static/loader.js
|
|
|
|
Args:
|
|
expected_image_locator: locator string pointing at expected image
|
|
actual_image_locator: locator string pointing at actual image
|
|
|
|
Returns: already-sanitized locator where the diffs between expected and
|
|
actual images can be found
|
|
"""
|
|
return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
|
|
_sanitize_locator(actual_image_locator))
|