From ac75d4340f2368683ad7b48ece4a8369cb055e2b Mon Sep 17 00:00:00 2001
From: Even Rouault <even.rouault@spatialys.com>
Date: Wed, 27 Sep 2023 13:09:16 +0200
Subject: [PATCH] gdalcompare.py: multiple enhancements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Multiple enhancements upstreamed from Planet's version:
  * RPC metadata comparison (can be disabled with SKIP_RPC option)
  * GEOLOCATION metadata comparison (disabled with SKIP_GEOLOCATION)
  * Fix comparing NaN nodata values
  * Dump differences of pixel content (enabled with DUMP_DIFFS option)
  * Improve comparison of float32/float64 data by not just checking
    checksum but also statistics
  * Overview comparison can be skipped with SKIP_OVERVIEWS
  * Add sanity check when comparing pixel-interleave datasets with
    more than 10 bands since that can be slow.

- Binary comparison can be done on /vsi files now

- Command line utility: add ``-dumpdiffs``, ``-skip_binary``,
  ``-skip_overviews``, ``-skip_geolocation``, ``-skip_geotransform``,
  ``-skip_metadata``, ``-skip_rpc``, and ``-skip_srs`` options

- Add a test_gdalcompare.py script
---
 autotest/pyscripts/test_gdalcompare.py        | 303 ++++++++++++++
 doc/source/programs/gdalcompare.rst           |  56 ++-
 .../gdal-utils/osgeo_utils/gdalcompare.py     | 388 +++++++++++++-----
 3 files changed, 647 insertions(+), 100 deletions(-)
 create mode 100644 autotest/pyscripts/test_gdalcompare.py
diff --git a/autotest/pyscripts/test_gdalcompare.py b/autotest/pyscripts/test_gdalcompare.py
new file mode 100644
index 000000000000..342b4d1be796
--- /dev/null
+++ b/autotest/pyscripts/test_gdalcompare.py
@@ -0,0 +1,303 @@
+#!/usr/bin/env pytest
+# -*- coding: utf-8 -*-
+###############################################################################
+# $Id$
+#
+# Project:  GDAL/OGR Test Suite
+# Purpose:  gdalcompare.py testing
+# Author:   Even Rouault <even dot rouault @ spatialys.com>
+#
+###############################################################################
+# Copyright (c) 2023, Even Rouault <even dot rouault at spatialys.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+###############################################################################
+
+import shutil
+
+import pytest
+import test_py_scripts
+
+from osgeo import gdal
+from osgeo_utils import gdalcompare
+
+pytestmark = pytest.mark.skipif(
+    test_py_scripts.get_py_script("gdalcompare") is None,
+    reason="gdalcompare not available",
+)
+
+
+@pytest.fixture()
+def script_path():
+    return test_py_scripts.get_py_script("gdalcompare")
+
+
+@pytest.fixture()
+def captured_print():
+    def noop_print(*args, **kwargs):
+        pass
+
+    ori_print = gdalcompare.my_print
+    gdalcompare.my_print = noop_print
+    try:
+        yield
+    finally:
+        gdalcompare.my_print = ori_print
+
+
+@pytest.fixture()
+def source_filename(tmp_vsimem):
+    filename = str(tmp_vsimem / "src.tif")
+    gdal.FileFromMemBuffer(filename, open("../gcore/data/byte.tif", "rb").read())
+    return filename
+
+
+###############################################################################
+
+
+def test_gdalcompare_same(script_path, tmp_path):
+
+    source_filename = str(tmp_path / "src.tif")
+    shutil.copy("../gcore/data/byte.tif", source_filename)
+    ret = test_py_scripts.run_py_script(
+        script_path, "gdalcompare", f"{source_filename} {source_filename}"
+    )
+    assert "Differences Found: 0" in ret
+
+
+###############################################################################
+
+
+def test_gdalcompare_different_type(script_path, tmp_path):
+
+    source_filename = str(tmp_path / "src.tif")
+    shutil.copy("../gcore/data/byte.tif", source_filename)
+    ret = test_py_scripts.run_py_script(
+        script_path, "gdalcompare", f"{source_filename} ../gcore/data/uint16.tif"
+    )
+    assert "Differences Found: 0" not in ret
+    assert "Band 1 pixel types differ" in ret
+
+
+###############################################################################
+
+
+def test_gdalcompare_different_pixel_content(
+    tmp_vsimem, captured_print, source_filename
+):
+
+    golden_filename = source_filename
+    filename = str(tmp_vsimem / "new.tif")
+    gdal.Translate(filename, golden_filename, options="-scale 0 1 0 0")
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1
+    )
+
+    prefix = str(tmp_vsimem / "")
+    gdalcompare.find_diff(
+        golden_filename,
+        filename,
+        options=["SKIP_BINARY", "DUMP_DIFFS", "DUMP_DIFFS_PREFIX=" + prefix],
+    )
+    ds = gdal.Open(prefix + "1.tif")
+    assert ds.GetRasterBand(1).Checksum() == 4672
+
+
+###############################################################################
+
+
+def test_gdalcompare_different_band_count(tmp_vsimem, captured_print, source_filename):
+
+    golden_filename = source_filename
+    filename = str(tmp_vsimem / "new.tif")
+    gdal.Translate(filename, golden_filename, options="-b 1 -b 1")
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1
+    )
+    assert (
+        gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) == 1
+    )
+
+
+###############################################################################
+
+
+def test_gdalcompare_different_dimension(tmp_vsimem, captured_print, source_filename):
+
+    golden_filename = source_filename
+    filename = str(tmp_vsimem / "new.tif")
+    gdal.Translate(filename, golden_filename, options="-srcwin 0 0 20 19")
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1
+    )
+
+
+###############################################################################
+
+
+def test_gdalcompare_different_nodata(tmp_vsimem, captured_print, source_filename):
+
+    golden_filename = "../gcore/data/byte.tif"
+    filename = str(tmp_vsimem / "new.tif")
+    gdal.Translate(filename, golden_filename, options="-a_nodata 1")
+    # diff_count = 2 because of mask flags as well
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 2
+    )
+    assert (
+        gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) == 2
+    )
+
+
+###############################################################################
+
+
+def test_gdalcompare_different_nodata_nan(tmp_vsimem, captured_print, source_filename):
+
+    golden_filename = str(tmp_vsimem / "golden.tif")
+    filename = str(tmp_vsimem / "new.tif")
+    gdal.Translate(
+        golden_filename, source_filename, options="-ot Float32 -a_nodata nan"
+    )
+    gdal.Translate(filename, source_filename, options="-ot Float32 -a_nodata 5")
+    assert (
+        gdalcompare.find_diff(golden_filename, golden_filename, options=["SKIP_BINARY"])
+        == 0
+    )
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1
+    )
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1
+    )
+
+
+###############################################################################
+
+
+def test_gdalcompare_different_srs(tmp_vsimem, captured_print, source_filename):
+
+    golden_filename = source_filename
+    filename = str(tmp_vsimem / "new.tif")
+
+    gdal.Translate(filename, golden_filename, options="-a_srs EPSG:4326")
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1
+    )
+    assert (
+        gdalcompare.find_diff(
+            golden_filename, filename, options=["SKIP_SRS", "SKIP_BINARY"]
+        )
+        == 0
+    )
+
+    ds = gdal.Translate(filename, golden_filename)
+    ds.SetSpatialRef(None)
+    ds = None
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) != 0
+    )
+    assert (
+        gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) != 0
+    )
+
+
+###############################################################################
+
+
+def test_gdalcompare_different_geotransform(
+    tmp_vsimem, captured_print, source_filename
+):
+
+    golden_filename = source_filename
+    filename = str(tmp_vsimem / "new.tif")
+    gdal.Translate(filename, golden_filename, options="-a_ullr 0 1 1 0")
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1
+    )
+    assert (
+        gdalcompare.find_diff(
+            golden_filename, filename, options=["SKIP_GEOTRANSFORM", "SKIP_BINARY"]
+        )
+        == 0
+    )
+
+    ds = gdal.Translate(filename, golden_filename)
+    ds.SetGeoTransform([0, 0, 0, 0, 0, 0])
+    ds = None
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) != 0
+    )
+    assert (
+        gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) != 0
+    )
+
+
+###############################################################################
+
+
+def test_gdalcompare_different_metadata(tmp_vsimem, captured_print, source_filename):
+
+    golden_filename = source_filename
+    filename = str(tmp_vsimem / "new.tif")
+    gdal.Translate(filename, golden_filename, options="-mo FOO=BAR")
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1
+    )
+    assert (
+        gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) == 2
+    )
+    assert (
+        gdalcompare.find_diff(
+            golden_filename, filename, options=["SKIP_METADATA", "SKIP_BINARY"]
+        )
+        == 0
+    )
+
+
+###############################################################################
+
+
+def test_gdalcompare_different_overview(tmp_vsimem, captured_print, source_filename):
+
+    golden_filename = str(tmp_vsimem / "golden.tif")
+    gdal.FileFromMemBuffer(golden_filename, open("../gcore/data/byte.tif", "rb").read())
+    ds = gdal.Open(golden_filename, gdal.GA_Update)
+    ds.BuildOverviews("NEAR", [2])
+    ds = None
+    assert (
+        gdalcompare.find_diff(golden_filename, golden_filename, options=["SKIP_BINARY"])
+        == 0
+    )
+
+    filename = str(tmp_vsimem / "new.tif")
+    gdal.Translate(filename, source_filename)
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1
+    )
+    assert (
+        gdalcompare.find_diff(filename, golden_filename, options=["SKIP_BINARY"]) == 1
+    )
+
+    ds = gdal.Translate(filename, source_filename)
+    ds.BuildOverviews("AVERAGE", [2])
+    ds = None
+    assert (
+        gdalcompare.find_diff(golden_filename, filename, options=["SKIP_BINARY"]) == 1
+    )
diff --git a/doc/source/programs/gdalcompare.rst b/doc/source/programs/gdalcompare.rst
index 6037e1c3bb75..993d52668427 100644
--- a/doc/source/programs/gdalcompare.rst
+++ b/doc/source/programs/gdalcompare.rst
@@ -15,7 +15,12 @@ Synopsis
 
 .. code-block::
 
-    gdalcompare.py [--help] [--help-general] [-sds] golden_file new_file
+    gdalcompare.py [--help] [--help-general]
+                   [-dumpdiffs] [-skip_binary] [-skip_overviews]
+                   [-skip_geolocation] [-skip_geotransform]
+                   [-skip_metadata] [-skip_rpc] [-skip_srs]
+                   [-sds] <golden_file> <new_file>
+
 
 Description
 -----------
@@ -34,6 +39,55 @@ count of 1 (the binary difference) should be considered acceptable.
 
 .. include:: options/help_and_help_general.rst
 
+.. option:: -dumpdiffs
+
+    .. versionadded:: 3.8
+
+    Whether to output the difference in pixel content in a TIFF file in the
+    current directory.
+
+.. option:: -skip_binary
+
+    .. versionadded:: 3.8
+
+    Whether to skip exact comparison of binary content.
+
+.. option:: -skip_overviews
+
+    .. versionadded:: 3.8
+
+    Whether to skip comparison of overviews.
+
+.. option:: -skip_geolocation
+
+    .. versionadded:: 3.8
+
+    Whether to skip comparison of GEOLOCATION metadata domain.
+
+.. option:: -skip_geotransform
+
+    .. versionadded:: 3.8
+
+    Whether to skip comparison of geotransform matrix.
+
+.. option:: -skip_metadata
+
+    .. versionadded:: 3.8
+
+    Whether to skip comparison of metadata
+
+.. option:: -skip_rpc
+
+    .. versionadded:: 3.8
+
+    Whether to skip comparison of Rational Polynomial Coefficients (RPC) metadata domain.
+
+.. option:: -skip_srs
+
+    .. versionadded:: 3.8
+
+    Whether to skip comparison of spatial reference systems (SRS).
+
 .. option:: -sds
 
     If this flag is passed the script will compare all subdatasets that
diff --git a/swig/python/gdal-utils/osgeo_utils/gdalcompare.py b/swig/python/gdal-utils/osgeo_utils/gdalcompare.py
index 8998427be2bb..e9dd6f8e206f 100644
--- a/swig/python/gdal-utils/osgeo_utils/gdalcompare.py
+++ b/swig/python/gdal-utils/osgeo_utils/gdalcompare.py
@@ -29,6 +29,7 @@
 #  DEALINGS IN THE SOFTWARE.
 # ******************************************************************************
 
+import array
 import filecmp
 import math
 import os
@@ -39,29 +40,50 @@
 #######################################################
 from osgeo_utils.auxiliary.base import PathLikeOrStr
 
+my_print = print
 
-def compare_metadata(golden_md, new_md, ident, options=None):
-    # pylint: disable=unused-argument
+
+def compare_metadata(golden_md, new_md, md_id, options=None):
 
     if golden_md is None and new_md is None:
         return 0
 
     found_diff = 0
 
-    if len(list(golden_md.keys())) != len(list(new_md.keys())):
-        print("Difference in %s metadata key count" % ident)
-        print("  Golden Keys: " + str(list(golden_md.keys())))
-        print("  New Keys: " + str(list(new_md.keys())))
+    golden_keys = list(golden_md.keys())
+    new_keys = list(new_md.keys())
+    dont_care_keys = ["backend", "ERR_BIAS", "ERR_RAND"]
+
+    for key in dont_care_keys:
+        if key in golden_keys:
+            golden_keys.remove(key)
+        if key in new_keys:
+            new_keys.remove(key)
+
+    if len(golden_keys) != len(new_keys):
+        my_print("Difference in %s metadata key count" % md_id)
+        my_print("  Golden Keys: " + str(golden_keys))
+        my_print("  New Keys: " + str(new_keys))
         found_diff += 1
 
-    for key in list(golden_md.keys()):
-        if key not in new_md:
-            print('New %s metadata lacks key "%s"' % (ident, key))
+    for key in golden_keys:
+        if key not in new_keys:
+            my_print('New %s metadata lacks key "%s"' % (md_id, key))
+            found_diff += 1
+        elif md_id == "RPC" and new_md[key].strip() != golden_md[key].strip():
+            # The strip above is because _RPC.TXT files and in-file have a difference
+            # in white space that is not otherwise meaningful.
+            my_print('RPC Metadata value difference for key "' + key + '"')
+            my_print('  Golden: "' + golden_md[key] + '"')
+            my_print('  New:    "' + new_md[key] + '"')
             found_diff += 1
-        elif new_md[key] != golden_md[key]:
-            print('Metadata value difference for key "' + key + '"')
-            print('  Golden: "' + golden_md[key] + '"')
-            print('  New:    "' + new_md[key] + '"')
+        elif md_id != "RPC" and new_md[key] != golden_md[key]:
+            if key == "NITF_FDT":
+                # this will always have the current date set
+                continue
+            my_print('Metadata value difference for key "' + key + '"')
+            my_print('  Golden: "' + golden_md[key] + '"')
+            my_print('  New:    "' + new_md[key] + '"')
             found_diff += 1
 
     return found_diff
@@ -69,107 +91,174 @@ def compare_metadata(golden_md, new_md, ident, options=None):
 
 #######################################################
 # Review and report on the actual image pixels that differ.
-def compare_image_pixels(golden_band, new_band, ident, options=None):
-    # pylint: disable=unused-argument
+def compare_image_pixels(golden_band, new_band, id, options=None):
 
     diff_count = 0
     max_diff = 0
 
+    out_db = None
+    if "DUMP_DIFFS" in options:
+        prefix = ""
+        for opt in options:
+            if opt.startswith("DUMP_DIFFS_PREFIX="):
+                prefix = opt[len("DUMP_DIFFS_PREFIX=") :]
+                break
+        diff_fn = prefix + id.replace(" ", "_") + ".tif"
+        out_db = gdal.GetDriverByName("GTiff").Create(
+            diff_fn, golden_band.XSize, golden_band.YSize, 1, gdal.GDT_Float32
+        )
+
+    xsize = golden_band.XSize
     for line in range(golden_band.YSize):
-        golden_line = golden_band.ReadAsArray(0, line, golden_band.XSize, 1)[0]
-        new_line = new_band.ReadAsArray(0, line, golden_band.XSize, 1)[0]
-        diff_line = golden_line.astype(float) - new_line.astype(float)
-        max_diff = max(max_diff, abs(diff_line).max())
-        diff_count += len(diff_line.nonzero()[0])
+        golden_line = array.array(
+            "d", golden_band.ReadRaster(0, line, xsize, 1, buf_type=gdal.GDT_Float64)
+        )
+        new_line = array.array(
+            "d", new_band.ReadRaster(0, line, xsize, 1, buf_type=gdal.GDT_Float64)
+        )
+        diff_line = [golden_line[i] - new_line[i] for i in range(xsize)]
+        max_diff_this_line = max([abs(x) for x in diff_line])
+        max_diff = max(max_diff, max_diff_this_line)
+        if max_diff_this_line:
+            diff_count += sum([(1 if x else 0) for x in diff_line])
+        if out_db is not None:
+            out_db.GetRasterBand(1).WriteRaster(
+                0,
+                line,
+                xsize,
+                1,
+                array.array("d", diff_line).tobytes(),
+                buf_type=gdal.GDT_Float64,
+            )
 
-    print("  Pixels Differing: " + str(diff_count))
-    print("  Maximum Pixel Difference: " + str(max_diff))
+    my_print("  Pixels Differing: " + str(diff_count))
+    my_print("  Maximum Pixel Difference: " + str(max_diff))
+    if out_db is not None:
+        my_print("  Wrote Diffs to: %s" % diff_fn)
 
 
 #######################################################
 
 
-def compare_band(golden_band, new_band, ident, options=None):
+def compare_band(golden_band, new_band, id, options=None):
     found_diff = 0
 
     options = [] if options is None else options
 
+    if golden_band.XSize != new_band.XSize or golden_band.YSize != new_band.YSize:
+        my_print(
+            "Band size mismatch (band=%s golden=[%d,%d], new=[%d,%d])"
+            % (id, golden_band.XSize, golden_band.YSize, new_band.XSize, new_band.YSize)
+        )
+        found_diff += 1
+
     if golden_band.DataType != new_band.DataType:
-        print("Band %s pixel types differ." % ident)
-        print("  Golden: " + gdal.GetDataTypeName(golden_band.DataType))
-        print("  New:    " + gdal.GetDataTypeName(new_band.DataType))
+        my_print("Band %s pixel types differ." % id)
+        my_print("  Golden: " + gdal.GetDataTypeName(golden_band.DataType))
+        my_print("  New:    " + gdal.GetDataTypeName(new_band.DataType))
         found_diff += 1
 
-    golden_band_nodata = golden_band.GetNoDataValue()
-    new_band_nodata = new_band.GetNoDataValue()
-    if golden_band_nodata != new_band_nodata:
-        if golden_band_nodata and new_band_nodata:
-            if not (math.isnan(golden_band_nodata) and math.isnan(new_band_nodata)):
-                print("Band %s nodata values differ." % ident)
-                print("  Golden: " + str(golden_band_nodata))
-                print("  New:    " + str(new_band_nodata))
-                found_diff += 1
-        else:
-            print("Band %s nodata values differ." % ident)
-            print("  Golden: " + str(golden_band_nodata))
-            print("  New:    " + str(new_band_nodata))
-            found_diff += 1
+    golden_nodata = golden_band.GetNoDataValue()
+    new_nodata = new_band.GetNoDataValue()
+
+    # Two 'nan' values are _never_ equal, but bands that both use 'nan' as
+    # nodata value do in fact use the same nodata value. Same for 'inf' and
+    # '-inf'. These checks are kind of gross, but are unavoidable since 'None'
+    # has to be accounted for. The reader might be tempted to simplify these
+    # checks with a couple of 'set()'s, however a set containing two 'nan'
+    # values has a length of 2, not 1.
+    if None not in (golden_nodata, new_nodata) and (
+        math.isnan(golden_nodata) and math.isnan(new_nodata)
+    ):
+        pass
+    elif None not in (golden_nodata, new_nodata) and (
+        math.isinf(golden_nodata) and math.isinf(new_nodata)
+    ):
+        pass
+    elif golden_nodata != new_nodata:
+        my_print("Band %s nodata values differ." % id)
+        my_print("  Golden: " + str(golden_nodata))
+        my_print("  New:    " + str(new_nodata))
+        found_diff += 1
 
     if golden_band.GetColorInterpretation() != new_band.GetColorInterpretation():
-        print("Band %s color interpretation values differ." % ident)
-        print(
+        my_print("Band %s color interpretation values differ." % id)
+        my_print(
             "  Golden: "
             + gdal.GetColorInterpretationName(golden_band.GetColorInterpretation())
         )
-        print(
+        my_print(
             "  New:    "
             + gdal.GetColorInterpretationName(new_band.GetColorInterpretation())
         )
         found_diff += 1
 
-    if golden_band.Checksum() != new_band.Checksum():
-        print("Band %s checksum difference:" % ident)
-        print("  Golden: " + str(golden_band.Checksum()))
-        print("  New:    " + str(new_band.Checksum()))
-        found_diff += 1
-        compare_image_pixels(golden_band, new_band, ident, options)
-
-    # Check overviews
-    if golden_band.GetOverviewCount() != new_band.GetOverviewCount():
-        print("Band %s overview count difference:" % ident)
-        print("  Golden: " + str(golden_band.GetOverviewCount()))
-        print("  New:    " + str(new_band.GetOverviewCount()))
+    golden_band_checksum = golden_band.Checksum()
+    new_band_checksum = new_band.Checksum()
+    if golden_band_checksum != new_band_checksum:
+        my_print("Band %s checksum difference:" % id)
+        my_print("  Golden: " + str(golden_band_checksum))
+        my_print("  New:    " + str(new_band_checksum))
+        if found_diff == 0:
+            compare_image_pixels(golden_band, new_band, id, options)
         found_diff += 1
     else:
-        for i in range(golden_band.GetOverviewCount()):
-            found_diff += compare_band(
-                golden_band.GetOverview(i),
-                new_band.GetOverview(i),
-                ident + " overview " + str(i),
-                options,
-            )
+        # check a bit deeper in case of Float data type for which the Checksum() function is not reliable
+        if golden_band.DataType in (gdal.GDT_Float32, gdal.GDT_Float64):
+            if golden_band.ComputeRasterMinMax() != new_band.ComputeRasterMinMax():
+                my_print("Band %s statistics difference:" % 1)
+                my_print("  Golden: " + str(golden_band.ComputeBandStats()))
+                my_print("  New:    " + str(new_band.ComputeBandStats()))
+                compare_image_pixels(golden_band, new_band, id, {})
 
-    # Metadata
-    if "SKIP_METADATA" not in options:
-        found_diff += compare_metadata(
-            golden_band.GetMetadata(), new_band.GetMetadata(), "Band " + ident, options
-        )
+    # Check overviews
+    if "SKIP_OVERVIEWS" not in options:
+        if golden_band.GetOverviewCount() != new_band.GetOverviewCount():
+            my_print("Band %s overview count difference:" % id)
+            my_print("  Golden: " + str(golden_band.GetOverviewCount()))
+            my_print("  New:    " + str(new_band.GetOverviewCount()))
+            found_diff += 1
+        else:
+            for i in range(golden_band.GetOverviewCount()):
+                found_diff += compare_band(
+                    golden_band.GetOverview(i),
+                    new_band.GetOverview(i),
+                    id + " overview " + str(i),
+                    options,
+                )
 
     # Mask band
     if golden_band.GetMaskFlags() != new_band.GetMaskFlags():
-        print("Band %s mask flags difference:" % ident)
-        print("  Golden: " + str(golden_band.GetMaskFlags()))
-        print("  New:    " + str(new_band.GetMaskFlags()))
+        my_print("Band %s mask flags difference:" % id)
+        my_print("  Golden: " + str(golden_band.GetMaskFlags()))
+        my_print("  New:    " + str(new_band.GetMaskFlags()))
         found_diff += 1
-    elif golden_band.GetMaskFlags() in (gdal.GMF_PER_DATASET, 0):
+    elif golden_band.GetMaskFlags() == gdal.GMF_PER_DATASET:
+        # Check mask band if it's GMF_PER_DATASET
         found_diff += compare_band(
             golden_band.GetMaskBand(),
             new_band.GetMaskBand(),
-            ident + " mask band",
+            id + " mask band",
             options,
         )
 
-    # TODO: Color Table, gain/bias, units, blocksize, min/max
+    # Metadata
+    if "SKIP_METADATA" not in options:
+        found_diff += compare_metadata(
+            golden_band.GetMetadata(), new_band.GetMetadata(), "Band " + id, options
+        )
+
+    # Band Description - currently this is opt in since we have not
+    # been tracking this in the past.  It would be nice to make it the
+    # default at some point.
+    if "CHECK_BAND_DESC" in options:
+        if golden_band.GetDescription() != new_band.GetDescription():
+            my_print("Band %s descriptions difference:" % id)
+            my_print("  Golden: " + str(golden_band.GetDescription()))
+            my_print("  New:    " + str(new_band.GetDescription()))
+            found_diff += 1
+
+    # TODO: Color Table, gain/bias, units, blocksize, mask, min/max
 
     return found_diff
 
@@ -181,20 +270,20 @@ def compare_srs(golden_wkt, new_wkt):
     if golden_wkt == new_wkt:
         return 0
 
-    print("Difference in SRS!")
+    my_print("Difference in SRS!")
 
     golden_srs = osr.SpatialReference(golden_wkt)
     new_srs = osr.SpatialReference(new_wkt)
 
     if golden_srs.IsSame(new_srs):
-        print("  * IsSame() reports them as equivalent.")
+        my_print("  * IsSame() reports them as equivalent.")
     else:
-        print("  * IsSame() reports them as different.")
+        my_print("  * IsSame() reports them as different.")
 
-    print("  Golden:")
-    print("  " + golden_srs.ExportToPrettyWkt())
-    print("  New:")
-    print("  " + new_srs.ExportToPrettyWkt())
+    my_print("  Golden:")
+    my_print("  " + (golden_srs.ExportToPrettyWkt() if golden_wkt else "None"))
+    my_print("  New:")
+    my_print("  " + (new_srs.ExportToPrettyWkt() if new_wkt else "None"))
 
     return 1
 
@@ -207,6 +296,21 @@ def compare_db(golden_db, new_db, options=None):
 
     options = [] if options is None else options
 
+    # Comparisons are done per-band, so an image with 'INTERLEAVE=PIXEL' and a
+    # lot of bands will take hours to complete.
+    if "SKIP_INTERLEAVE_CHECK" not in options:
+        maxbands = 10
+        interleave = golden_db.GetMetadata("IMAGE_STRUCTURE").get("INTERLEAVE", "")
+        if golden_db.RasterCount > maxbands and interleave.lower() == "pixel":
+            raise ValueError(
+                f"Golden file has more than {maxbands} and INTERLEAVE={interleave} - this"
+                f" check will eventually succeed but will take hours due to the"
+                f" amount of I/O required for per-band comparisons. Recommend"
+                f" testing image encoding directly in your test, and then"
+                f" translating to a band interleaved format before calling this"
+                f" method: {golden_db.GetDescription()}"
+            )
+
     # SRS
     if "SKIP_SRS" not in options:
         found_diff += compare_srs(golden_db.GetProjection(), new_db.GetProjection())
@@ -216,9 +320,9 @@ def compare_db(golden_db, new_db, options=None):
         golden_gt = golden_db.GetGeoTransform()
         new_gt = new_db.GetGeoTransform()
         if golden_gt != new_gt:
-            print("GeoTransforms Differ:")
-            print("  Golden: " + str(golden_gt))
-            print("  New:    " + str(new_gt))
+            my_print("GeoTransforms Differ:")
+            my_print("  Golden: " + str(golden_gt))
+            my_print("  New:    " + str(new_gt))
             found_diff += 1
 
     # Metadata
@@ -227,13 +331,27 @@ def compare_db(golden_db, new_db, options=None):
             golden_db.GetMetadata(), new_db.GetMetadata(), "Dataset", options
         )
 
+    if "SKIP_RPC" not in options:
+        found_diff += compare_metadata(
+            golden_db.GetMetadata("RPC"), new_db.GetMetadata("RPC"), "RPC", options
+        )
+
+    if "SKIP_GEOLOCATION" not in options:
+        found_diff += compare_metadata(
+            golden_db.GetMetadata("GEOLOCATION"),
+            new_db.GetMetadata("GEOLOCATION"),
+            "GEOLOCATION",
+            options,
+        )
+
     # Bands
     if golden_db.RasterCount != new_db.RasterCount:
-        print(
+        my_print(
             "Band count mismatch (golden=%d, new=%d)"
             % (golden_db.RasterCount, new_db.RasterCount)
         )
         found_diff += 1
+        return found_diff
 
     # Dimensions
     for i in range(golden_db.RasterCount):
@@ -243,7 +361,7 @@ def compare_db(golden_db, new_db, options=None):
         nSzY = new_db.GetRasterBand(i + 1).YSize
 
         if gSzX != nSzX or gSzY != nSzY:
-            print(
+            my_print(
                 "Band size mismatch (band=%d golden=[%d,%d], new=[%d,%d])"
                 % (i, gSzX, gSzY, nSzX, nSzY)
             )
@@ -283,7 +401,7 @@ def compare_sds(golden_db, new_db, options=None):
         sds_diff = compare_db(sub_golden_db, sub_new_db, options)
         found_diff += sds_diff
         if sds_diff > 0:
-            print(
+            my_print(
                 "%d differences found between:\n  %s\n  %s"
                 % (sds_diff, golden_sds[key], new_sds[key])
             )
@@ -295,28 +413,72 @@ def compare_sds(golden_db, new_db, options=None):
 
 
 def find_diff(
-    golden_file: PathLikeOrStr, new_file: PathLikeOrStr, check_sds: bool = False
+    golden_file: PathLikeOrStr,
+    new_file: PathLikeOrStr,
+    check_sds: bool = False,
+    options=None,
 ):
     # Compare Files
     found_diff = 0
 
-    # compare raw binary files.
-    try:
-        os.stat(golden_file)
+    options = [] if options is None else options
 
-        if not filecmp.cmp(golden_file, new_file):
-            print("Files differ at the binary level.")
-            found_diff += 1
-    except OSError:
-        print("Skipped binary file comparison, golden file not in filesystem.")
+    if "SKIP_BINARY" not in options:
+        # compare raw binary files.
+        try:
+            os.stat(golden_file)
+            os.stat(new_file)
+
+            if not filecmp.cmp(golden_file, new_file):
+                my_print("Files differ at the binary level.")
+                found_diff += 1
+        except OSError:
+            stat_golden = gdal.VSIStatL(str(golden_file))
+            stat_new = gdal.VSIStatL(str(new_file))
+            if stat_golden and stat_new:
+                if stat_golden.size != stat_new.size:
+                    my_print("Files differ at the binary level.")
+                    found_diff += 1
+                else:
+                    f_golden = gdal.VSIFOpenL(str(golden_file), "rb")
+                    f_new = gdal.VSIFOpenL(str(new_file), "rb")
+                    if f_golden and f_new:
+                        off = 0
+                        while off < stat_golden.size:
+                            to_read = min(stat_golden.size - off, 1024 * 1024)
+                            golden_chunk = gdal.VSIFReadL(1, to_read, f_golden)
+                            if len(golden_chunk) < to_read:
+                                my_print(
+                                    "Binary file comparison failed: not enough bytes read in golden file"
+                                )
+                                break
+                            new_chunk = gdal.VSIFReadL(1, to_read, f_new)
+                            if golden_chunk != new_chunk:
+                                my_print("Files differ at the binary level.")
+                                found_diff += 1
+                                break
+                            off += to_read
+                    if f_golden:
+                        gdal.VSIFCloseL(f_golden)
+                    if f_new:
+                        gdal.VSIFCloseL(f_new)
+            else:
+                if not stat_golden:
+                    my_print(
+                        "Skipped binary file comparison, golden file not in filesystem."
+                    )
+                elif not new_file:
+                    my_print(
+                        "Skipped binary file comparison, new file not in filesystem."
+                    )
 
     # compare as GDAL Datasets.
     golden_db = gdal.Open(golden_file)
     new_db = gdal.Open(new_file)
-    found_diff += compare_db(golden_db, new_db)
+    found_diff += compare_db(golden_db, new_db, options)
 
     if check_sds:
-        found_diff += compare_sds(golden_db, new_db)
+        found_diff += compare_sds(golden_db, new_db, options)
 
     return found_diff
 
@@ -326,6 +488,9 @@ def find_diff(
 
 def Usage():
     print("Usage: gdalcompare.py [--help] [--help-general]")
+    print("                      [-dumpdiffs] [-skip_binary] [-skip_overviews]")
+    print("                      [-skip_geolocation] [-skip_geotransform]")
+    print("                      [-skip_metadata] [-skip_rpc] [-skip_srs]")
     print("                      [-sds] <golden_file> <new_file>")
     return 2
 
@@ -349,6 +514,7 @@ def main(argv=sys.argv):
     golden_file = None
     new_file = None
     check_sds = 0
+    options = []
 
     i = 1
     while i < len(argv):
@@ -359,6 +525,30 @@ def main(argv=sys.argv):
         elif argv[i] == "-sds":
             check_sds = 1
 
+        elif argv[i] == "-dumpdiffs":
+            options.append("DUMP_DIFFS")
+
+        elif argv[i] == "-skip_binary":
+            options.append("SKIP_BINARY")
+
+        elif argv[i] == "-skip_overviews":
+            options.append("SKIP_OVERVIEWS")
+
+        elif argv[i] == "-skip_geolocation":
+            options.append("SKIP_GEOLOCATION")
+
+        elif argv[i] == "-skip_geotransform":
+            options.append("SKIP_GEOTRANSFORM")
+
+        elif argv[i] == "-skip_metadata":
+            options.append("SKIP_METADATA")
+
+        elif argv[i] == "-skip_rpc":
+            options.append("SKIP_RPC")
+
+        elif argv[i] == "-skip_srs":
+            options.append("SKIP_SRS")
+
         elif golden_file is None:
             golden_file = argv[i]
 
@@ -366,13 +556,13 @@ def main(argv=sys.argv):
             new_file = argv[i]
 
         else:
-            print("Unrecognised argument: " + argv[i])
+            my_print("Unrecognised argument: " + argv[i])
             return Usage()
 
         i = i + 1
         # next argument
 
-    found_diff = find_diff(golden_file, new_file, check_sds)
+    found_diff = find_diff(golden_file, new_file, check_sds, options)
     print("Differences Found: " + str(found_diff))
     sys.exit(found_diff)