From ac75d4340f2368683ad7b48ece4a8369cb055e2b Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Wed, 27 Sep 2023 13:09:16 +0200 Subject: [PATCH] gdalcompare.py: multiple enhancements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Multiple enhancements upstreamed from Planet's version: * RPC metadata comparison (can be disabled with SKIP_RPC option) * GEOLOCATION metadata comparison (disabled with SKIP_GEOLOCATION) * Fix comparing NaN nodata values * Dump differences of pixel content (enabled with DUMP_DIFFS option) * Improve comparison of float32/float64 data by not just checking checksum but also statistics * Overview comparison can be skipped with SKIP_OVERVIEWS * Add sanity check when comparing pixel-interleave datasets with more than 10 bands since that can be slow. - Binary comparison can be done on /vsi files now - Command line utility: add ``-dumpdiffs``, ``-skip_binary``, ``-skip_overviews``, ``-skip_geolocation``, ``-skip_geotransform``, ``-skip_metadata``, ``-skip_rpc``, and ``-skip_srs`` options - Add a test_gdalcompare.py script --- autotest/pyscripts/test_gdalcompare.py | 303 ++++++++++++++ doc/source/programs/gdalcompare.rst | 56 ++- .../gdal-utils/osgeo_utils/gdalcompare.py | 388 +++++++++++++----- 3 files changed, 647 insertions(+), 100 deletions(-) create mode 100644 autotest/pyscripts/test_gdalcompare.py diff --git a/autotest/pyscripts/test_gdalcompare.py b/autotest/pyscripts/test_gdalcompare.py new file mode 100644 index 000000000000..342b4d1be796 --- /dev/null +++ b/autotest/pyscripts/test_gdalcompare.py @@ -0,0 +1,303 @@ +#!/usr/bin/env pytest +# -*- coding: utf-8 -*- +############################################################################### +# $Id$ +# +# Project: GDAL/OGR Test Suite +# Purpose: gdalcompare.py testing +# Author: Even Rouault +# +############################################################################### +# Copyright (c) 2023, Even Rouault +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +############################################################################### + +import shutil + +import pytest +import test_py_scripts + +from osgeo import gdal +from osgeo_utils import gdalcompare + +pytestmark = pytest.mark.skipif( + test_py_scripts.get_py_script("gdalcompare") is None, + reason="gdalcompare not available", +) + + +@pytest.fixture() +def script_path(): + return test_py_scripts.get_py_script("gdalcompare") + + +@pytest.fixture() +def captured_print(): + def noop_print(*args, **kwargs): + pass + + ori_print = gdalcompare.my_print + gdalcompare.my_print = noop_print + try: + yield + finally: + gdalcompare.my_print = ori_print + + +@pytest.fixture() +def source_filename(tmp_vsimem): + filename = str(tmp_vsimem / "src.tif") + gdal.FileFromMemBuffer(filename, open("../gcore/data/byte.tif", "rb").read()) + return filename + + +############################################################################### + + +def test_gdalcompare_same(script_path, tmp_path): + + source_filename = str(tmp_path / "src.tif") + shutil.copy("../gcore/data/byte.tif", source_filename) + ret = test_py_scripts.run_py_script( + script_path, "gdalcompare", f"{source_filename} {source_filename}" + ) + assert "Differences Found: 0" in ret + + +############################################################################### + + +def test_gdalcompare_different_type(script_path, tmp_path): + + source_filename = str(tmp_path / "src.tif") + shutil.copy("../gcore/data/byte.tif", source_filename) + ret = test_py_scripts.run_py_script( + script_path, "gdalcompare", f"{source_filename} ../gcore/data/uint16.tif" + ) + assert "Differences Found: 0" not in ret + assert "Band 1 pixel types differ" in ret + + +############################################################################### + + +def test_gdalcompare_different_pixel_content( + tmp_vsimem, captured_print, source_filename +): + + golden_filename = source_filename + filename = str(tmp_vsimem / "new.tif") + gdal.Translate(filename, golden_filename, options="-scale 0 1 0 0") + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1 + ) + + prefix = str(tmp_vsimem / "") + gdalcompare.find_diff( + golden_filename, + filename, + options=["SKIP_BINARY", "DUMP_DIFFS", "DUMP_DIFFS_PREFIX=" + prefix], + ) + ds = gdal.Open(prefix + "1.tif") + assert ds.GetRasterBand(1).Checksum() == 4672 + + +############################################################################### + + +def test_gdalcompare_different_band_count(tmp_vsimem, captured_print, source_filename): + + golden_filename = source_filename + filename = str(tmp_vsimem / "new.tif") + gdal.Translate(filename, golden_filename, options="-b 1 -b 1") + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1 + ) + assert ( + gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) == 1 + ) + + +############################################################################### + + +def test_gdalcompare_different_dimension(tmp_vsimem, captured_print, source_filename): + + golden_filename = source_filename + filename = str(tmp_vsimem / "new.tif") + gdal.Translate(filename, golden_filename, options="-srcwin 0 0 20 19") + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1 + ) + + +############################################################################### + + +def test_gdalcompare_different_nodata(tmp_vsimem, captured_print, source_filename): + + golden_filename = "../gcore/data/byte.tif" + filename = str(tmp_vsimem / "new.tif") + gdal.Translate(filename, golden_filename, options="-a_nodata 1") + # diff_count = 2 because of mask flags as well + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 2 + ) + assert ( + gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) == 2 + ) + + +############################################################################### + + +def test_gdalcompare_different_nodata_nan(tmp_vsimem, captured_print, source_filename): + + golden_filename = str(tmp_vsimem / "golden.tif") + filename = str(tmp_vsimem / "new.tif") + gdal.Translate( + golden_filename, source_filename, options="-ot Float32 -a_nodata nan" + ) + gdal.Translate(filename, source_filename, options="-ot Float32 -a_nodata 5") + assert ( + gdalcompare.find_diff(golden_filename, golden_filename, options=["SKIP_BINARY"]) + == 0 + ) + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1 + ) + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1 + ) + + +############################################################################### + + +def test_gdalcompare_different_srs(tmp_vsimem, captured_print, source_filename): + + golden_filename = source_filename + filename = str(tmp_vsimem / "new.tif") + + gdal.Translate(filename, golden_filename, options="-a_srs EPSG:4326") + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1 + ) + assert ( + gdalcompare.find_diff( + golden_filename, filename, options=["SKIP_SRS", "SKIP_BINARY"] + ) + == 0 + ) + + ds = gdal.Translate(filename, golden_filename) + ds.SetSpatialRef(None) + ds = None + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) != 0 + ) + assert ( + gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) != 0 + ) + + +############################################################################### + + +def test_gdalcompare_different_geotransform( + tmp_vsimem, captured_print, source_filename +): + + golden_filename = source_filename + filename = str(tmp_vsimem / "new.tif") + gdal.Translate(filename, golden_filename, options="-a_ullr 0 1 1 0") + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1 + ) + assert ( + gdalcompare.find_diff( + golden_filename, filename, options=["SKIP_GEOTRANSFORM", "SKIP_BINARY"] + ) + == 0 + ) + + ds = gdal.Translate(filename, golden_filename) + ds.SetGeoTransform([0, 0, 0, 0, 0, 0]) + ds = None + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) != 0 + ) + assert ( + gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) != 0 + ) + + +############################################################################### + + +def test_gdalcompare_different_metadata(tmp_vsimem, captured_print, source_filename): + + golden_filename = source_filename + filename = str(tmp_vsimem / "new.tif") + gdal.Translate(filename, golden_filename, options="-mo FOO=BAR") + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1 + ) + assert ( + gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) == 2 + ) + assert ( + gdalcompare.find_diff( + golden_filename, filename, options=["SKIP_METADATA", "SKIP_BINARY"] + ) + == 0 + ) + + +############################################################################### + + +def test_gdalcompare_different_overview(tmp_vsimem, captured_print, source_filename): + + golden_filename = str(tmp_vsimem / "golden.tif") + gdal.FileFromMemBuffer(golden_filename, open("../gcore/data/byte.tif", "rb").read()) + ds = gdal.Open(golden_filename, gdal.GA_Update) + ds.BuildOverviews("NEAR", [2]) + ds = None + assert ( + gdalcompare.find_diff(golden_filename, golden_filename, options=["SKIP_BINARY"]) + == 0 + ) + + filename = str(tmp_vsimem / "new.tif") + gdal.Translate(filename, source_filename) + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1 + ) + assert ( + gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) == 1 + ) + + ds = gdal.Translate(filename, source_filename) + ds.BuildOverviews("AVERAGE", [2]) + ds = None + assert ( + gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1 + ) diff --git a/doc/source/programs/gdalcompare.rst b/doc/source/programs/gdalcompare.rst index 6037e1c3bb75..993d52668427 100644 --- a/doc/source/programs/gdalcompare.rst +++ b/doc/source/programs/gdalcompare.rst @@ -15,7 +15,12 @@ Synopsis .. code-block:: - gdalcompare.py [--help] [--help-general] [-sds] golden_file new_file + gdalcompare.py [--help] [--help-general] + [-dumpdiffs] [-skip_binary] [-skip_overviews] + [-skip_geolocation] [-skip_geotransform] + [-skip_metadata] [-skip_rpc] [-skip_srs] + [-sds] + Description ----------- @@ -34,6 +39,55 @@ count of 1 (the binary difference) should be considered acceptable. .. include:: options/help_and_help_general.rst +.. option:: -dumpdiffs + + .. versionadded:: 3.8 + + Whether to output the difference in pixel content in a TIFF file in the + current directory. + +.. option:: -skip_binary + + .. versionadded:: 3.8 + + Whether to skip exact comparison of binary content. + +.. option:: -skip_overviews + + .. versionadded:: 3.8 + + Whether to skip comparison of overviews. + +.. option:: -skip_geolocation + + .. versionadded:: 3.8 + + Whether to skip comparison of GEOLOCATION metadata domain. + +.. option:: -skip_geotransform + + .. versionadded:: 3.8 + + Whether to skip comparison of geotransform matrix. + +.. option:: -skip_metadata + + .. versionadded:: 3.8 + + Whether to skip comparison of metadata + +.. option:: -skip_rpc + + .. versionadded:: 3.8 + + Whether to skip comparison of Rational Polynomial Coefficients (RPC) metadata domain. + +.. option:: -skip_srs + + .. versionadded:: 3.8 + + Whether to skip comparison of spatial reference systems (SRS). + .. option:: -sds If this flag is passed the script will compare all subdatasets that diff --git a/swig/python/gdal-utils/osgeo_utils/gdalcompare.py b/swig/python/gdal-utils/osgeo_utils/gdalcompare.py index 8998427be2bb..e9dd6f8e206f 100644 --- a/swig/python/gdal-utils/osgeo_utils/gdalcompare.py +++ b/swig/python/gdal-utils/osgeo_utils/gdalcompare.py @@ -29,6 +29,7 @@ # DEALINGS IN THE SOFTWARE. # ****************************************************************************** +import array import filecmp import math import os @@ -39,29 +40,50 @@ ####################################################### from osgeo_utils.auxiliary.base import PathLikeOrStr +my_print = print -def compare_metadata(golden_md, new_md, ident, options=None): - # pylint: disable=unused-argument + +def compare_metadata(golden_md, new_md, md_id, options=None): if golden_md is None and new_md is None: return 0 found_diff = 0 - if len(list(golden_md.keys())) != len(list(new_md.keys())): - print("Difference in %s metadata key count" % ident) - print(" Golden Keys: " + str(list(golden_md.keys()))) - print(" New Keys: " + str(list(new_md.keys()))) + golden_keys = list(golden_md.keys()) + new_keys = list(new_md.keys()) + dont_care_keys = ["backend", "ERR_BIAS", "ERR_RAND"] + + for key in dont_care_keys: + if key in golden_keys: + golden_keys.remove(key) + if key in new_keys: + new_keys.remove(key) + + if len(golden_keys) != len(new_keys): + my_print("Difference in %s metadata key count" % md_id) + my_print(" Golden Keys: " + str(golden_keys)) + my_print(" New Keys: " + str(new_keys)) found_diff += 1 - for key in list(golden_md.keys()): - if key not in new_md: - print('New %s metadata lacks key "%s"' % (ident, key)) + for key in golden_keys: + if key not in new_keys: + my_print('New %s metadata lacks key "%s"' % (md_id, key)) + found_diff += 1 + elif md_id == "RPC" and new_md[key].strip() != golden_md[key].strip(): + # The strip above is because _RPC.TXT files and in-file have a difference + # in white space that is not otherwise meaningful. + my_print('RPC Metadata value difference for key "' + key + '"') + my_print(' Golden: "' + golden_md[key] + '"') + my_print(' New: "' + new_md[key] + '"') found_diff += 1 - elif new_md[key] != golden_md[key]: - print('Metadata value difference for key "' + key + '"') - print(' Golden: "' + golden_md[key] + '"') - print(' New: "' + new_md[key] + '"') + elif md_id != "RPC" and new_md[key] != golden_md[key]: + if key == "NITF_FDT": + # this will always have the current date set + continue + my_print('Metadata value difference for key "' + key + '"') + my_print(' Golden: "' + golden_md[key] + '"') + my_print(' New: "' + new_md[key] + '"') found_diff += 1 return found_diff @@ -69,107 +91,174 @@ def compare_metadata(golden_md, new_md, ident, options=None): ####################################################### # Review and report on the actual image pixels that differ. -def compare_image_pixels(golden_band, new_band, ident, options=None): - # pylint: disable=unused-argument +def compare_image_pixels(golden_band, new_band, id, options=None): diff_count = 0 max_diff = 0 + out_db = None + if "DUMP_DIFFS" in options: + prefix = "" + for opt in options: + if opt.startswith("DUMP_DIFFS_PREFIX="): + prefix = opt[len("DUMP_DIFFS_PREFIX=") :] + break + diff_fn = prefix + id.replace(" ", "_") + ".tif" + out_db = gdal.GetDriverByName("GTiff").Create( + diff_fn, golden_band.XSize, golden_band.YSize, 1, gdal.GDT_Float32 + ) + + xsize = golden_band.XSize for line in range(golden_band.YSize): - golden_line = golden_band.ReadAsArray(0, line, golden_band.XSize, 1)[0] - new_line = new_band.ReadAsArray(0, line, golden_band.XSize, 1)[0] - diff_line = golden_line.astype(float) - new_line.astype(float) - max_diff = max(max_diff, abs(diff_line).max()) - diff_count += len(diff_line.nonzero()[0]) + golden_line = array.array( + "d", golden_band.ReadRaster(0, line, xsize, 1, buf_type=gdal.GDT_Float64) + ) + new_line = array.array( + "d", new_band.ReadRaster(0, line, xsize, 1, buf_type=gdal.GDT_Float64) + ) + diff_line = [golden_line[i] - new_line[i] for i in range(xsize)] + max_diff_this_line = max([abs(x) for x in diff_line]) + max_diff = max(max_diff, max_diff_this_line) + if max_diff_this_line: + diff_count += sum([(1 if x else 0) for x in diff_line]) + if out_db is not None: + out_db.GetRasterBand(1).WriteRaster( + 0, + line, + xsize, + 1, + array.array("d", diff_line).tobytes(), + buf_type=gdal.GDT_Float64, + ) - print(" Pixels Differing: " + str(diff_count)) - print(" Maximum Pixel Difference: " + str(max_diff)) + my_print(" Pixels Differing: " + str(diff_count)) + my_print(" Maximum Pixel Difference: " + str(max_diff)) + if out_db is not None: + my_print(" Wrote Diffs to: %s" % diff_fn) ####################################################### -def compare_band(golden_band, new_band, ident, options=None): +def compare_band(golden_band, new_band, id, options=None): found_diff = 0 options = [] if options is None else options + if golden_band.XSize != new_band.XSize or golden_band.YSize != new_band.YSize: + my_print( + "Band size mismatch (band=%s golden=[%d,%d], new=[%d,%d])" + % (id, golden_band.XSize, golden_band.YSize, new_band.XSize, new_band.YSize) + ) + found_diff += 1 + if golden_band.DataType != new_band.DataType: - print("Band %s pixel types differ." % ident) - print(" Golden: " + gdal.GetDataTypeName(golden_band.DataType)) - print(" New: " + gdal.GetDataTypeName(new_band.DataType)) + my_print("Band %s pixel types differ." % id) + my_print(" Golden: " + gdal.GetDataTypeName(golden_band.DataType)) + my_print(" New: " + gdal.GetDataTypeName(new_band.DataType)) found_diff += 1 - golden_band_nodata = golden_band.GetNoDataValue() - new_band_nodata = new_band.GetNoDataValue() - if golden_band_nodata != new_band_nodata: - if golden_band_nodata and new_band_nodata: - if not (math.isnan(golden_band_nodata) and math.isnan(new_band_nodata)): - print("Band %s nodata values differ." % ident) - print(" Golden: " + str(golden_band_nodata)) - print(" New: " + str(new_band_nodata)) - found_diff += 1 - else: - print("Band %s nodata values differ." % ident) - print(" Golden: " + str(golden_band_nodata)) - print(" New: " + str(new_band_nodata)) - found_diff += 1 + golden_nodata = golden_band.GetNoDataValue() + new_nodata = new_band.GetNoDataValue() + + # Two 'nan' values are _never_ equal, but bands that both use 'nan' as + # nodata value do in fact use the same nodata value. Same for 'inf' and + # '-inf'. These checks are kind of gross, but are unavoidable since 'None' + # has to be accounted for. The reader might be tempted to simplify these + # checks with a couple of 'set()'s, however a set containing two 'nan' + # values has a length of 2, not 1. + if None not in (golden_nodata, new_nodata) and ( + math.isnan(golden_nodata) and math.isnan(new_nodata) + ): + pass + elif None not in (golden_nodata, new_nodata) and ( + math.isinf(golden_nodata) and math.isinf(new_nodata) + ): + pass + elif golden_nodata != new_nodata: + my_print("Band %s nodata values differ." % id) + my_print(" Golden: " + str(golden_nodata)) + my_print(" New: " + str(new_nodata)) + found_diff += 1 if golden_band.GetColorInterpretation() != new_band.GetColorInterpretation(): - print("Band %s color interpretation values differ." % ident) - print( + my_print("Band %s color interpretation values differ." % id) + my_print( " Golden: " + gdal.GetColorInterpretationName(golden_band.GetColorInterpretation()) ) - print( + my_print( " New: " + gdal.GetColorInterpretationName(new_band.GetColorInterpretation()) ) found_diff += 1 - if golden_band.Checksum() != new_band.Checksum(): - print("Band %s checksum difference:" % ident) - print(" Golden: " + str(golden_band.Checksum())) - print(" New: " + str(new_band.Checksum())) - found_diff += 1 - compare_image_pixels(golden_band, new_band, ident, options) - - # Check overviews - if golden_band.GetOverviewCount() != new_band.GetOverviewCount(): - print("Band %s overview count difference:" % ident) - print(" Golden: " + str(golden_band.GetOverviewCount())) - print(" New: " + str(new_band.GetOverviewCount())) + golden_band_checksum = golden_band.Checksum() + new_band_checksum = new_band.Checksum() + if golden_band_checksum != new_band_checksum: + my_print("Band %s checksum difference:" % id) + my_print(" Golden: " + str(golden_band_checksum)) + my_print(" New: " + str(new_band_checksum)) + if found_diff == 0: + compare_image_pixels(golden_band, new_band, id, options) found_diff += 1 else: - for i in range(golden_band.GetOverviewCount()): - found_diff += compare_band( - golden_band.GetOverview(i), - new_band.GetOverview(i), - ident + " overview " + str(i), - options, - ) + # check a bit deeper in case of Float data type for which the Checksum() function is not reliable + if golden_band.DataType in (gdal.GDT_Float32, gdal.GDT_Float64): + if golden_band.ComputeRasterMinMax() != new_band.ComputeRasterMinMax(): + my_print("Band %s statistics difference:" % 1) + my_print(" Golden: " + str(golden_band.ComputeBandStats())) + my_print(" New: " + str(new_band.ComputeBandStats())) + compare_image_pixels(golden_band, new_band, id, {}) - # Metadata - if "SKIP_METADATA" not in options: - found_diff += compare_metadata( - golden_band.GetMetadata(), new_band.GetMetadata(), "Band " + ident, options - ) + # Check overviews + if "SKIP_OVERVIEWS" not in options: + if golden_band.GetOverviewCount() != new_band.GetOverviewCount(): + my_print("Band %s overview count difference:" % id) + my_print(" Golden: " + str(golden_band.GetOverviewCount())) + my_print(" New: " + str(new_band.GetOverviewCount())) + found_diff += 1 + else: + for i in range(golden_band.GetOverviewCount()): + found_diff += compare_band( + golden_band.GetOverview(i), + new_band.GetOverview(i), + id + " overview " + str(i), + options, + ) # Mask band if golden_band.GetMaskFlags() != new_band.GetMaskFlags(): - print("Band %s mask flags difference:" % ident) - print(" Golden: " + str(golden_band.GetMaskFlags())) - print(" New: " + str(new_band.GetMaskFlags())) + my_print("Band %s mask flags difference:" % id) + my_print(" Golden: " + str(golden_band.GetMaskFlags())) + my_print(" New: " + str(new_band.GetMaskFlags())) found_diff += 1 - elif golden_band.GetMaskFlags() in (gdal.GMF_PER_DATASET, 0): + elif golden_band.GetMaskFlags() == gdal.GMF_PER_DATASET: + # Check mask band if it's GMF_PER_DATASET found_diff += compare_band( golden_band.GetMaskBand(), new_band.GetMaskBand(), - ident + " mask band", + id + " mask band", options, ) - # TODO: Color Table, gain/bias, units, blocksize, min/max + # Metadata + if "SKIP_METADATA" not in options: + found_diff += compare_metadata( + golden_band.GetMetadata(), new_band.GetMetadata(), "Band " + id, options + ) + + # Band Description - currently this is opt in since we have not + # been tracking this in the past. It would be nice to make it the + # default at some point. + if "CHECK_BAND_DESC" in options: + if golden_band.GetDescription() != new_band.GetDescription(): + my_print("Band %s descriptions difference:" % id) + my_print(" Golden: " + str(golden_band.GetDescription())) + my_print(" New: " + str(new_band.GetDescription())) + found_diff += 1 + + # TODO: Color Table, gain/bias, units, blocksize, mask, min/max return found_diff @@ -181,20 +270,20 @@ def compare_srs(golden_wkt, new_wkt): if golden_wkt == new_wkt: return 0 - print("Difference in SRS!") + my_print("Difference in SRS!") golden_srs = osr.SpatialReference(golden_wkt) new_srs = osr.SpatialReference(new_wkt) if golden_srs.IsSame(new_srs): - print(" * IsSame() reports them as equivalent.") + my_print(" * IsSame() reports them as equivalent.") else: - print(" * IsSame() reports them as different.") + my_print(" * IsSame() reports them as different.") - print(" Golden:") - print(" " + golden_srs.ExportToPrettyWkt()) - print(" New:") - print(" " + new_srs.ExportToPrettyWkt()) + my_print(" Golden:") + my_print(" " + (golden_srs.ExportToPrettyWkt() if golden_wkt else "None")) + my_print(" New:") + my_print(" " + (new_srs.ExportToPrettyWkt() if new_wkt else "None")) return 1 @@ -207,6 +296,21 @@ def compare_db(golden_db, new_db, options=None): options = [] if options is None else options + # Comparisons are done per-band, so an image with 'INTERLEAVE=PIXEL' and a + # lot of bands will take hours to complete. + if "SKIP_INTERLEAVE_CHECK" not in options: + maxbands = 10 + interleave = golden_db.GetMetadata("IMAGE_STRUCTURE").get("INTERLEAVE", "") + if golden_db.RasterCount > maxbands and interleave.lower() == "pixel": + raise ValueError( + f"Golden file has more than {maxbands} and INTERLEAVE={interleave} - this" + f" check will eventually succeed but will take hours due to the" + f" amount of I/O required for per-band comparisons. Recommend" + f" testing image encoding directly in your test, and then" + f" translating to a band interleaved format before calling this" + f" method: {golden_db.GetDescription()}" + ) + # SRS if "SKIP_SRS" not in options: found_diff += compare_srs(golden_db.GetProjection(), new_db.GetProjection()) @@ -216,9 +320,9 @@ def compare_db(golden_db, new_db, options=None): golden_gt = golden_db.GetGeoTransform() new_gt = new_db.GetGeoTransform() if golden_gt != new_gt: - print("GeoTransforms Differ:") - print(" Golden: " + str(golden_gt)) - print(" New: " + str(new_gt)) + my_print("GeoTransforms Differ:") + my_print(" Golden: " + str(golden_gt)) + my_print(" New: " + str(new_gt)) found_diff += 1 # Metadata @@ -227,13 +331,27 @@ def compare_db(golden_db, new_db, options=None): golden_db.GetMetadata(), new_db.GetMetadata(), "Dataset", options ) + if "SKIP_RPC" not in options: + found_diff += compare_metadata( + golden_db.GetMetadata("RPC"), new_db.GetMetadata("RPC"), "RPC", options + ) + + if "SKIP_GEOLOCATION" not in options: + found_diff += compare_metadata( + golden_db.GetMetadata("GEOLOCATION"), + new_db.GetMetadata("GEOLOCATION"), + "GEOLOCATION", + options, + ) + # Bands if golden_db.RasterCount != new_db.RasterCount: - print( + my_print( "Band count mismatch (golden=%d, new=%d)" % (golden_db.RasterCount, new_db.RasterCount) ) found_diff += 1 + return found_diff # Dimensions for i in range(golden_db.RasterCount): @@ -243,7 +361,7 @@ def compare_db(golden_db, new_db, options=None): nSzY = new_db.GetRasterBand(i + 1).YSize if gSzX != nSzX or gSzY != nSzY: - print( + my_print( "Band size mismatch (band=%d golden=[%d,%d], new=[%d,%d])" % (i, gSzX, gSzY, nSzX, nSzY) ) @@ -283,7 +401,7 @@ def compare_sds(golden_db, new_db, options=None): sds_diff = compare_db(sub_golden_db, sub_new_db, options) found_diff += sds_diff if sds_diff > 0: - print( + my_print( "%d differences found between:\n %s\n %s" % (sds_diff, golden_sds[key], new_sds[key]) ) @@ -295,28 +413,72 @@ def compare_sds(golden_db, new_db, options=None): def find_diff( - golden_file: PathLikeOrStr, new_file: PathLikeOrStr, check_sds: bool = False + golden_file: PathLikeOrStr, + new_file: PathLikeOrStr, + check_sds: bool = False, + options=None, ): # Compare Files found_diff = 0 - # compare raw binary files. - try: - os.stat(golden_file) + options = [] if options is None else options - if not filecmp.cmp(golden_file, new_file): - print("Files differ at the binary level.") - found_diff += 1 - except OSError: - print("Skipped binary file comparison, golden file not in filesystem.") + if "SKIP_BINARY" not in options: + # compare raw binary files. + try: + os.stat(golden_file) + os.stat(new_file) + + if not filecmp.cmp(golden_file, new_file): + my_print("Files differ at the binary level.") + found_diff += 1 + except OSError: + stat_golden = gdal.VSIStatL(str(golden_file)) + stat_new = gdal.VSIStatL(str(new_file)) + if stat_golden and stat_new: + if stat_golden.size != stat_new.size: + my_print("Files differ at the binary level.") + found_diff += 1 + else: + f_golden = gdal.VSIFOpenL(str(golden_file), "rb") + f_new = gdal.VSIFOpenL(str(new_file), "rb") + if f_golden and f_new: + off = 0 + while off < stat_golden.size: + to_read = min(stat_golden.size - off, 1024 * 1024) + golden_chunk = gdal.VSIFReadL(1, to_read, f_golden) + if len(golden_chunk) < to_read: + my_print( + "Binary file comparison failed: not enough bytes read in golden file" + ) + break + new_chunk = gdal.VSIFReadL(1, to_read, f_new) + if golden_chunk != new_chunk: + my_print("Files differ at the binary level.") + found_diff += 1 + break + off += to_read + if f_golden: + gdal.VSIFCloseL(f_golden) + if f_new: + gdal.VSIFCloseL(f_new) + else: + if not stat_golden: + my_print( + "Skipped binary file comparison, golden file not in filesystem." + ) + elif not new_file: + my_print( + "Skipped binary file comparison, new file not in filesystem." + ) # compare as GDAL Datasets. golden_db = gdal.Open(golden_file) new_db = gdal.Open(new_file) - found_diff += compare_db(golden_db, new_db) + found_diff += compare_db(golden_db, new_db, options) if check_sds: - found_diff += compare_sds(golden_db, new_db) + found_diff += compare_sds(golden_db, new_db, options) return found_diff @@ -326,6 +488,9 @@ def find_diff( def Usage(): print("Usage: gdalcompare.py [--help] [--help-general]") + print(" [-dumpdiffs] [-skip_binary] [-skip_overviews]") + print(" [-skip_geolocation] [-skip_geotransform]") + print(" [-skip_metadata] [-skip_rpc] [-skip_srs]") print(" [-sds] ") return 2 @@ -349,6 +514,7 @@ def main(argv=sys.argv): golden_file = None new_file = None check_sds = 0 + options = [] i = 1 while i < len(argv): @@ -359,6 +525,30 @@ def main(argv=sys.argv): elif argv[i] == "-sds": check_sds = 1 + elif argv[i] == "-dumpdiffs": + options.append("DUMP_DIFFS") + + elif argv[i] == "-skip_binary": + options.append("SKIP_BINARY") + + elif argv[i] == "-skip_overviews": + options.append("SKIP_OVERVIEWS") + + elif argv[i] == "-skip_geolocation": + options.append("SKIP_GEOLOCATION") + + elif argv[i] == "-skip_geotransform": + options.append("SKIP_GEOTRANSFORM") + + elif argv[i] == "-skip_metadata": + options.append("SKIP_METADATA") + + elif argv[i] == "-skip_rpc": + options.append("SKIP_RPC") + + elif argv[i] == "-skip_srs": + options.append("SKIP_SRS") + elif golden_file is None: golden_file = argv[i] @@ -366,13 +556,13 @@ def main(argv=sys.argv): new_file = argv[i] else: - print("Unrecognised argument: " + argv[i]) + my_print("Unrecognised argument: " + argv[i]) return Usage() i = i + 1 # next argument - found_diff = find_diff(golden_file, new_file, check_sds) + found_diff = find_diff(golden_file, new_file, check_sds, options) print("Differences Found: " + str(found_diff)) sys.exit(found_diff)