skia2/gm/rebaseline_server/imagediffdb.py

334 lines
13 KiB
Python

#!/usr/bin/python
"""
Copyright 2013 Google Inc.
Use of this source code is governed by a BSD-style license that can be
found in the LICENSE file.
Calulate differences between image pairs, and store them in a database.
"""
import contextlib
import json
import logging
import os
import re
import shutil
import sys
import tempfile
import urllib
# Set the PYTHONPATH to include the tools directory.
sys.path.append(
os.path.join(
os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,
'tools'))
import find_run_binary
SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')
DEFAULT_IMAGE_SUFFIX = '.png'
DEFAULT_IMAGES_SUBDIR = 'images'
DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
RGBDIFFS_SUBDIR = 'diffs'
WHITEDIFFS_SUBDIR = 'whitediffs'
# Keys used within DiffRecord dictionary representations.
# NOTE: Keep these in sync with static/constants.js
KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'
KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels'
KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference'
class DiffRecord(object):
""" Record of differences between two images. """
def __init__(self, storage_root,
expected_image_url, expected_image_locator,
actual_image_url, actual_image_locator,
expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
image_suffix=DEFAULT_IMAGE_SUFFIX):
"""Download this pair of images (unless we already have them on local disk),
and prepare a DiffRecord for them.
TODO(epoger): Make this asynchronously download images, rather than blocking
until the images have been downloaded and processed.
Args:
storage_root: root directory on local disk within which we store all
images
expected_image_url: file or HTTP url from which we will download the
expected image
expected_image_locator: a unique ID string under which we will store the
expected image within storage_root (probably including a checksum to
guarantee uniqueness)
actual_image_url: file or HTTP url from which we will download the
actual image
actual_image_locator: a unique ID string under which we will store the
actual image within storage_root (probably including a checksum to
guarantee uniqueness)
expected_images_subdir: the subdirectory expected images are stored in.
actual_images_subdir: the subdirectory actual images are stored in.
image_suffix: the suffix of images.
"""
expected_image_locator = _sanitize_locator(expected_image_locator)
actual_image_locator = _sanitize_locator(actual_image_locator)
# Download the expected/actual images, if we don't have them already.
# TODO(rmistry): Add a parameter that just tries to use already-present
# image files rather than downloading them.
expected_image_file = os.path.join(
storage_root, expected_images_subdir,
str(expected_image_locator) + image_suffix)
actual_image_file = os.path.join(
storage_root, actual_images_subdir,
str(actual_image_locator) + image_suffix)
try:
_download_file(expected_image_file, expected_image_url)
except Exception:
logging.exception('unable to download expected_image_url %s to file %s' %
(expected_image_url, expected_image_file))
raise
try:
_download_file(actual_image_file, actual_image_url)
except Exception:
logging.exception('unable to download actual_image_url %s to file %s' %
(actual_image_url, actual_image_file))
raise
# Get all diff images and values from skpdiff binary.
skpdiff_output_dir = tempfile.mkdtemp()
try:
skpdiff_summary_file = os.path.join(skpdiff_output_dir,
'skpdiff-output.json')
skpdiff_rgbdiff_dir = os.path.join(skpdiff_output_dir, 'rgbDiff')
skpdiff_whitediff_dir = os.path.join(skpdiff_output_dir, 'whiteDiff')
expected_img = os.path.join(storage_root, expected_images_subdir,
str(expected_image_locator) + image_suffix)
actual_img = os.path.join(storage_root, actual_images_subdir,
str(actual_image_locator) + image_suffix)
# TODO: Call skpdiff ONCE for all image pairs, instead of calling it
# repeatedly. This will allow us to parallelize a lot more work.
find_run_binary.run_command(
[SKPDIFF_BINARY, '-p', expected_img, actual_img,
'--jsonp', 'false',
'--output', skpdiff_summary_file,
'--differs', 'perceptual', 'different_pixels',
'--rgbDiffDir', skpdiff_rgbdiff_dir,
'--whiteDiffDir', skpdiff_whitediff_dir,
])
# Get information out of the skpdiff_summary_file.
with contextlib.closing(open(skpdiff_summary_file)) as fp:
data = json.load(fp)
# For now, we can assume there is only one record in the output summary,
# since we passed skpdiff only one pair of images.
record = data['records'][0]
self._width = record['width']
self._height = record['height']
# TODO: make max_diff_per_channel a tuple instead of a list, because the
# structure is meaningful (first element is red, second is green, etc.)
# See http://stackoverflow.com/a/626871
self._max_diff_per_channel = [
record['maxRedDiff'], record['maxGreenDiff'], record['maxBlueDiff']]
rgb_diff_path = record['rgbDiffPath']
white_diff_path = record['whiteDiffPath']
per_differ_stats = record['diffs']
for stats in per_differ_stats:
differ_name = stats['differName']
if differ_name == 'different_pixels':
self._num_pixels_differing = stats['pointsOfInterest']
elif differ_name == 'perceptual':
perceptual_similarity = stats['result']
# skpdiff returns the perceptual similarity; convert it to get the
# perceptual difference percentage.
# skpdiff outputs -1 if the images are different sizes. Treat any
# output that does not lie in [0, 1] as having 0% perceptual
# similarity.
if not 0 <= perceptual_similarity <= 1:
perceptual_similarity = 0
self._perceptual_difference = 100 - (perceptual_similarity * 100)
# Store the rgbdiff and whitediff images generated above.
diff_image_locator = _get_difference_locator(
expected_image_locator=expected_image_locator,
actual_image_locator=actual_image_locator)
basename = str(diff_image_locator) + image_suffix
_mkdir_unless_exists(os.path.join(storage_root, RGBDIFFS_SUBDIR))
_mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR))
# TODO: Modify skpdiff's behavior so we can tell it exactly where to
# write the image files into, rather than having to move them around
# after skpdiff writes them out.
shutil.copyfile(rgb_diff_path,
os.path.join(storage_root, RGBDIFFS_SUBDIR, basename))
shutil.copyfile(white_diff_path,
os.path.join(storage_root, WHITEDIFFS_SUBDIR, basename))
finally:
shutil.rmtree(skpdiff_output_dir)
def get_num_pixels_differing(self):
"""Returns the absolute number of pixels that differ."""
return self._num_pixels_differing
def get_percent_pixels_differing(self):
"""Returns the percentage of pixels that differ, as a float between
0 and 100 (inclusive)."""
return ((float(self._num_pixels_differing) * 100) /
(self._width * self._height))
def get_perceptual_difference(self):
"""Returns the perceptual difference percentage."""
return self._perceptual_difference
def get_max_diff_per_channel(self):
"""Returns the maximum difference between the expected and actual images
for each R/G/B channel, as a list."""
return self._max_diff_per_channel
def as_dict(self):
"""Returns a dictionary representation of this DiffRecord, as needed when
constructing the JSON representation."""
return {
KEY__DIFFERENCES__NUM_DIFF_PIXELS: self._num_pixels_differing,
KEY__DIFFERENCES__PERCENT_DIFF_PIXELS:
self.get_percent_pixels_differing(),
KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,
KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference,
}
class ImageDiffDB(object):
""" Calculates differences between image pairs, maintaining a database of
them for download."""
def __init__(self, storage_root):
"""
Args:
storage_root: string; root path within the DB will store all of its stuff
"""
self._storage_root = storage_root
# Dictionary of DiffRecords, keyed by (expected_image_locator,
# actual_image_locator) tuples.
self._diff_dict = {}
def add_image_pair(self,
expected_image_url, expected_image_locator,
actual_image_url, actual_image_locator):
"""Download this pair of images (unless we already have them on local disk),
and prepare a DiffRecord for them.
TODO(epoger): Make this asynchronously download images, rather than blocking
until the images have been downloaded and processed.
When we do that, we should probably add a new method that will block
until all of the images have been downloaded and processed. Otherwise,
we won't know when it's safe to start calling get_diff_record().
jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
thread-pool/worker queue at a higher level that just uses ImageDiffDB?
Args:
expected_image_url: file or HTTP url from which we will download the
expected image
expected_image_locator: a unique ID string under which we will store the
expected image within storage_root (probably including a checksum to
guarantee uniqueness)
actual_image_url: file or HTTP url from which we will download the
actual image
actual_image_locator: a unique ID string under which we will store the
actual image within storage_root (probably including a checksum to
guarantee uniqueness)
"""
expected_image_locator = _sanitize_locator(expected_image_locator)
actual_image_locator = _sanitize_locator(actual_image_locator)
key = (expected_image_locator, actual_image_locator)
if not key in self._diff_dict:
try:
new_diff_record = DiffRecord(
self._storage_root,
expected_image_url=expected_image_url,
expected_image_locator=expected_image_locator,
actual_image_url=actual_image_url,
actual_image_locator=actual_image_locator)
except Exception:
# If we can't create a real DiffRecord for this (expected, actual) pair,
# store None and the UI will show whatever information we DO have.
# Fixes http://skbug.com/2368 .
logging.exception(
'got exception while creating a DiffRecord for '
'expected_image_url=%s , actual_image_url=%s; returning None' % (
expected_image_url, actual_image_url))
new_diff_record = None
self._diff_dict[key] = new_diff_record
def get_diff_record(self, expected_image_locator, actual_image_locator):
"""Returns the DiffRecord for this image pair.
Raises a KeyError if we don't have a DiffRecord for this image pair.
"""
key = (_sanitize_locator(expected_image_locator),
_sanitize_locator(actual_image_locator))
return self._diff_dict[key]
# Utility functions
def _download_file(local_filepath, url):
"""Download a file from url to local_filepath, unless it is already there.
Args:
local_filepath: path on local disk where the image should be stored
url: URL from which we can download the image if we don't have it yet
"""
if not os.path.exists(local_filepath):
_mkdir_unless_exists(os.path.dirname(local_filepath))
with contextlib.closing(urllib.urlopen(url)) as url_handle:
with open(local_filepath, 'wb') as file_handle:
shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
def _mkdir_unless_exists(path):
"""Unless path refers to an already-existing directory, create it.
Args:
path: path on local disk
"""
if not os.path.isdir(path):
os.makedirs(path)
def _sanitize_locator(locator):
"""Returns a sanitized version of a locator (one in which we know none of the
characters will have special meaning in filenames).
Args:
locator: string, or something that can be represented as a string
"""
return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
def _get_difference_locator(expected_image_locator, actual_image_locator):
"""Returns the locator string used to look up the diffs between expected_image
and actual_image.
We must keep this function in sync with getImageDiffRelativeUrl() in
static/loader.js
Args:
expected_image_locator: locator string pointing at expected image
actual_image_locator: locator string pointing at actual image
Returns: already-sanitized locator where the diffs between expected and
actual images can be found
"""
return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
_sanitize_locator(actual_image_locator))