diff --git a/debian/patches/series.zfs b/debian/patches/series.zfs index 67a08e21..25667b65 100644 --- a/debian/patches/series.zfs +++ b/debian/patches/series.zfs @@ -7,3 +7,6 @@ zfs/0006-dont-symlink-zed-scripts.patch zfs/0007-Use-installed-python3.patch zfs/0008-Add-systemd-unit-for-importing-specific-pools.patch zfs/0009-Patch-move-manpage-arcstat-1-to-arcstat-8.patch +zfs/0010-arcstat-Fix-integer-division-with-python3.patch +zfs/0011-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch +zfs/0012-Fix-zvol_open-lock-inversion.patch diff --git a/debian/patches/zfs/0010-arcstat-Fix-integer-division-with-python3.patch b/debian/patches/zfs/0010-arcstat-Fix-integer-division-with-python3.patch new file mode 100644 index 00000000..086347f8 --- /dev/null +++ b/debian/patches/zfs/0010-arcstat-Fix-integer-division-with-python3.patch @@ -0,0 +1,134 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Valmiky Arquissandas +Date: Fri, 8 Oct 2021 16:32:27 +0100 +Subject: [PATCH] arcstat: Fix integer division with python3 + +The arcstat script requests compatibility with python2 and python3, but +PEP 238 modified the / operator and results in erroneous output when +run under python3. + +This commit replaces instances of / with //, yielding the expected +result in both versions of Python. + +Reviewed-by: Brian Behlendorf +Reviewed-by: John Kennedy +Reviewed-by: Ryan Moeller +Signed-off-by: Valmiky Arquissandas +Closes #12603 +(cherry picked from commit 2d02bba23d83ae8fede8d281edc255f01ccd28e9) +Signed-off-by: Thomas Lamprecht +--- + cmd/arcstat/arcstat.in | 66 +++++++++++++++++++++--------------------- + 1 file changed, 33 insertions(+), 33 deletions(-) + +diff --git a/cmd/arcstat/arcstat.in b/cmd/arcstat/arcstat.in +index 9e7c52a6c..cd9a803a2 100755 +--- a/cmd/arcstat/arcstat.in ++++ b/cmd/arcstat/arcstat.in +@@ -441,73 +441,73 @@ def calculate(): + + v = dict() + v["time"] = time.strftime("%H:%M:%S", time.localtime()) +- v["hits"] = d["hits"] / sint +- v["miss"] = d["misses"] / sint ++ v["hits"] = d["hits"] // sint ++ v["miss"] = d["misses"] // sint + v["read"] = v["hits"] + v["miss"] +- v["hit%"] = 100 * v["hits"] / v["read"] if v["read"] > 0 else 0 ++ v["hit%"] = 100 * v["hits"] // v["read"] if v["read"] > 0 else 0 + v["miss%"] = 100 - v["hit%"] if v["read"] > 0 else 0 + +- v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) / sint +- v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) / sint ++ v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) // sint ++ v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) // sint + + v["dread"] = v["dhit"] + v["dmis"] +- v["dh%"] = 100 * v["dhit"] / v["dread"] if v["dread"] > 0 else 0 ++ v["dh%"] = 100 * v["dhit"] // v["dread"] if v["dread"] > 0 else 0 + v["dm%"] = 100 - v["dh%"] if v["dread"] > 0 else 0 + +- v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) / sint ++ v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) // sint + v["pmis"] = (d["prefetch_data_misses"] + +- d["prefetch_metadata_misses"]) / sint ++ d["prefetch_metadata_misses"]) // sint + + v["pread"] = v["phit"] + v["pmis"] +- v["ph%"] = 100 * v["phit"] / v["pread"] if v["pread"] > 0 else 0 ++ v["ph%"] = 100 * v["phit"] // v["pread"] if v["pread"] > 0 else 0 + v["pm%"] = 100 - v["ph%"] if v["pread"] > 0 else 0 + + v["mhit"] = (d["prefetch_metadata_hits"] + +- d["demand_metadata_hits"]) / sint ++ d["demand_metadata_hits"]) // sint + v["mmis"] = (d["prefetch_metadata_misses"] + +- d["demand_metadata_misses"]) / sint ++ d["demand_metadata_misses"]) // sint + + v["mread"] = v["mhit"] + v["mmis"] +- v["mh%"] = 100 * v["mhit"] / v["mread"] if v["mread"] > 0 else 0 ++ v["mh%"] = 100 * v["mhit"] // v["mread"] if v["mread"] > 0 else 0 + v["mm%"] = 100 - v["mh%"] if v["mread"] > 0 else 0 + + v["arcsz"] = cur["size"] + v["size"] = cur["size"] + v["c"] = cur["c"] +- v["mfu"] = d["mfu_hits"] / sint +- v["mru"] = d["mru_hits"] / sint +- v["mrug"] = d["mru_ghost_hits"] / sint +- v["mfug"] = d["mfu_ghost_hits"] / sint +- v["eskip"] = d["evict_skip"] / sint +- v["el2skip"] = d["evict_l2_skip"] / sint +- v["el2cach"] = d["evict_l2_cached"] / sint +- v["el2el"] = d["evict_l2_eligible"] / sint +- v["el2mfu"] = d["evict_l2_eligible_mfu"] / sint +- v["el2mru"] = d["evict_l2_eligible_mru"] / sint +- v["el2inel"] = d["evict_l2_ineligible"] / sint +- v["mtxmis"] = d["mutex_miss"] / sint ++ v["mfu"] = d["mfu_hits"] // sint ++ v["mru"] = d["mru_hits"] // sint ++ v["mrug"] = d["mru_ghost_hits"] // sint ++ v["mfug"] = d["mfu_ghost_hits"] // sint ++ v["eskip"] = d["evict_skip"] // sint ++ v["el2skip"] = d["evict_l2_skip"] // sint ++ v["el2cach"] = d["evict_l2_cached"] // sint ++ v["el2el"] = d["evict_l2_eligible"] // sint ++ v["el2mfu"] = d["evict_l2_eligible_mfu"] // sint ++ v["el2mru"] = d["evict_l2_eligible_mru"] // sint ++ v["el2inel"] = d["evict_l2_ineligible"] // sint ++ v["mtxmis"] = d["mutex_miss"] // sint + + if l2exist: +- v["l2hits"] = d["l2_hits"] / sint +- v["l2miss"] = d["l2_misses"] / sint ++ v["l2hits"] = d["l2_hits"] // sint ++ v["l2miss"] = d["l2_misses"] // sint + v["l2read"] = v["l2hits"] + v["l2miss"] +- v["l2hit%"] = 100 * v["l2hits"] / v["l2read"] if v["l2read"] > 0 else 0 ++ v["l2hit%"] = 100 * v["l2hits"] // v["l2read"] if v["l2read"] > 0 else 0 + + v["l2miss%"] = 100 - v["l2hit%"] if v["l2read"] > 0 else 0 + v["l2asize"] = cur["l2_asize"] + v["l2size"] = cur["l2_size"] +- v["l2bytes"] = d["l2_read_bytes"] / sint ++ v["l2bytes"] = d["l2_read_bytes"] // sint + + v["l2pref"] = cur["l2_prefetch_asize"] + v["l2mfu"] = cur["l2_mfu_asize"] + v["l2mru"] = cur["l2_mru_asize"] + v["l2data"] = cur["l2_bufc_data_asize"] + v["l2meta"] = cur["l2_bufc_metadata_asize"] +- v["l2pref%"] = 100 * v["l2pref"] / v["l2asize"] +- v["l2mfu%"] = 100 * v["l2mfu"] / v["l2asize"] +- v["l2mru%"] = 100 * v["l2mru"] / v["l2asize"] +- v["l2data%"] = 100 * v["l2data"] / v["l2asize"] +- v["l2meta%"] = 100 * v["l2meta"] / v["l2asize"] ++ v["l2pref%"] = 100 * v["l2pref"] // v["l2asize"] ++ v["l2mfu%"] = 100 * v["l2mfu"] // v["l2asize"] ++ v["l2mru%"] = 100 * v["l2mru"] // v["l2asize"] ++ v["l2data%"] = 100 * v["l2data"] // v["l2asize"] ++ v["l2meta%"] = 100 * v["l2meta"] // v["l2asize"] + + v["grow"] = 0 if cur["arc_no_grow"] else 1 + v["need"] = cur["arc_need_free"] diff --git a/debian/patches/zfs/0011-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch b/debian/patches/zfs/0011-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch new file mode 100644 index 00000000..8de5df9c --- /dev/null +++ b/debian/patches/zfs/0011-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch @@ -0,0 +1,112 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Thomas Lamprecht +Date: Wed, 10 Nov 2021 09:29:47 +0100 +Subject: [PATCH] arc stat/summary: guard access to l2arc MFU/MRU stats + +commit 085321621e79a75bea41c2b6511da6ebfbf2ba0a added printing MFU +and MRU stats for 2.1 user space tools, but those keys are not +available in the 2.0 module. That means it may break the arcstat and +arc_summary tools after upgrade to 2.1 (user space), before a reboot +to the new 2.1 ZFS kernel-module happened, due to python raising a +KeyError on the dict access then. + +Move those two keys to a .get accessor with `0` as fallback, as it +should be better to show some possible wrong data for new stat-keys +than throwing an exception. + +Signed-off-by: Thomas Lamprecht + +also move l2_mfu_asize l2_mru_asize l2_prefetch_asize +l2_bufc_data_asize l2_bufc_metadata_asize to .get accessor +(these are only present with a cache device in the pool) +Signed-off-by: Stoiko Ivanov +--- + cmd/arc_summary/arc_summary3 | 28 ++++++++++++++-------------- + cmd/arcstat/arcstat.in | 14 +++++++------- + 2 files changed, 21 insertions(+), 21 deletions(-) + +diff --git a/cmd/arc_summary/arc_summary3 b/cmd/arc_summary/arc_summary3 +index 7b28012ed..fe6a6d9e2 100755 +--- a/cmd/arc_summary/arc_summary3 ++++ b/cmd/arc_summary/arc_summary3 +@@ -617,13 +617,13 @@ def section_arc(kstats_dict): + prt_i1('L2 cached evictions:', f_bytes(arc_stats['evict_l2_cached'])) + prt_i1('L2 eligible evictions:', f_bytes(arc_stats['evict_l2_eligible'])) + prt_i2('L2 eligible MFU evictions:', +- f_perc(arc_stats['evict_l2_eligible_mfu'], ++ f_perc(arc_stats.get('evict_l2_eligible_mfu', 0), # 2.0 module compat + arc_stats['evict_l2_eligible']), +- f_bytes(arc_stats['evict_l2_eligible_mfu'])) ++ f_bytes(arc_stats.get('evict_l2_eligible_mfu', 0))) + prt_i2('L2 eligible MRU evictions:', +- f_perc(arc_stats['evict_l2_eligible_mru'], ++ f_perc(arc_stats.get('evict_l2_eligible_mru', 0), # 2.0 module compat + arc_stats['evict_l2_eligible']), +- f_bytes(arc_stats['evict_l2_eligible_mru'])) ++ f_bytes(arc_stats.get('evict_l2_eligible_mru', 0))) + prt_i1('L2 ineligible evictions:', + f_bytes(arc_stats['evict_l2_ineligible'])) + print() +@@ -765,20 +765,20 @@ def section_l2arc(kstats_dict): + f_perc(arc_stats['l2_hdr_size'], arc_stats['l2_size']), + f_bytes(arc_stats['l2_hdr_size'])) + prt_i2('MFU allocated size:', +- f_perc(arc_stats['l2_mfu_asize'], arc_stats['l2_asize']), +- f_bytes(arc_stats['l2_mfu_asize'])) ++ f_perc(arc_stats.get('l2_mfu_asize', 0), arc_stats['l2_asize']), ++ f_bytes(arc_stats.get('l2_mfu_asize', 0))) # 2.0 module compat + prt_i2('MRU allocated size:', +- f_perc(arc_stats['l2_mru_asize'], arc_stats['l2_asize']), +- f_bytes(arc_stats['l2_mru_asize'])) ++ f_perc(arc_stats.get('l2_mru_asize', 0), arc_stats['l2_asize']), ++ f_bytes(arc_stats.get('l2_mru_asize', 0))) # 2.0 module compat + prt_i2('Prefetch allocated size:', +- f_perc(arc_stats['l2_prefetch_asize'], arc_stats['l2_asize']), +- f_bytes(arc_stats['l2_prefetch_asize'])) ++ f_perc(arc_stats.get('l2_prefetch_asize', 0), arc_stats['l2_asize']), ++ f_bytes(arc_stats.get('l2_prefetch_asize',0))) # 2.0 module compat + prt_i2('Data (buffer content) allocated size:', +- f_perc(arc_stats['l2_bufc_data_asize'], arc_stats['l2_asize']), +- f_bytes(arc_stats['l2_bufc_data_asize'])) ++ f_perc(arc_stats.get('l2_bufc_data_asize', 0), arc_stats['l2_asize']), ++ f_bytes(arc_stats.get('l2_bufc_data_asize', 0))) # 2.0 module compat + prt_i2('Metadata (buffer content) allocated size:', +- f_perc(arc_stats['l2_bufc_metadata_asize'], arc_stats['l2_asize']), +- f_bytes(arc_stats['l2_bufc_metadata_asize'])) ++ f_perc(arc_stats.get('l2_bufc_metadata_asize', 0), arc_stats['l2_asize']), ++ f_bytes(arc_stats.get('l2_bufc_metadata_asize', 0))) # 2.0 module compat + + print() + prt_1('L2ARC breakdown:', f_hits(l2_access_total)) +diff --git a/cmd/arcstat/arcstat.in b/cmd/arcstat/arcstat.in +index cd9a803a2..ea45dc602 100755 +--- a/cmd/arcstat/arcstat.in ++++ b/cmd/arcstat/arcstat.in +@@ -482,8 +482,8 @@ def calculate(): + v["el2skip"] = d["evict_l2_skip"] // sint + v["el2cach"] = d["evict_l2_cached"] // sint + v["el2el"] = d["evict_l2_eligible"] // sint +- v["el2mfu"] = d["evict_l2_eligible_mfu"] // sint +- v["el2mru"] = d["evict_l2_eligible_mru"] // sint ++ v["el2mfu"] = d.get("evict_l2_eligible_mfu", 0) // sint ++ v["el2mru"] = d.get("evict_l2_eligible_mru", 0) // sint + v["el2inel"] = d["evict_l2_ineligible"] // sint + v["mtxmis"] = d["mutex_miss"] // sint + +@@ -498,11 +498,11 @@ def calculate(): + v["l2size"] = cur["l2_size"] + v["l2bytes"] = d["l2_read_bytes"] // sint + +- v["l2pref"] = cur["l2_prefetch_asize"] +- v["l2mfu"] = cur["l2_mfu_asize"] +- v["l2mru"] = cur["l2_mru_asize"] +- v["l2data"] = cur["l2_bufc_data_asize"] +- v["l2meta"] = cur["l2_bufc_metadata_asize"] ++ v["l2pref"] = cur.get("l2_prefetch_asize", 0) ++ v["l2mfu"] = cur.get("l2_mfu_asize", 0) ++ v["l2mru"] = cur.get("l2_mru_asize", 0) ++ v["l2data"] = cur.get("l2_bufc_data_asize", 0) ++ v["l2meta"] = cur.get("l2_bufc_metadata_asize", 0) + v["l2pref%"] = 100 * v["l2pref"] // v["l2asize"] + v["l2mfu%"] = 100 * v["l2mfu"] // v["l2asize"] + v["l2mru%"] = 100 * v["l2mru"] // v["l2asize"] \ No newline at end of file diff --git a/debian/patches/zfs/0012-Fix-zvol_open-lock-inversion.patch b/debian/patches/zfs/0012-Fix-zvol_open-lock-inversion.patch new file mode 100644 index 00000000..eb74550f --- /dev/null +++ b/debian/patches/zfs/0012-Fix-zvol_open-lock-inversion.patch @@ -0,0 +1,212 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Brian Behlendorf +Date: Fri, 17 Dec 2021 09:52:13 -0800 +Subject: [PATCH] Fix zvol_open() lock inversion + +When restructuring the zvol_open() logic for the Linux 5.13 kernel +a lock inversion was accidentally introduced. In the updated code +the spa_namespace_lock is now taken before the zv_suspend_lock +allowing the following scenario to occur: + + down_read <=== waiting for zv_suspend_lock + zvol_open <=== holds spa_namespace_lock + __blkdev_get + blkdev_get_by_dev + blkdev_open + ... + + mutex_lock <== waiting for spa_namespace_lock + spa_open_common + spa_open + dsl_pool_hold + dmu_objset_hold_flags + dmu_objset_hold + dsl_prop_get + dsl_prop_get_integer + zvol_create_minor + dmu_recv_end + zfs_ioc_recv_impl <=== holds zv_suspend_lock via zvol_suspend() + zfs_ioc_recv + ... + +This commit resolves the issue by moving the acquisition of the +spa_namespace_lock back to after the zv_suspend_lock which restores +the original ordering. + +Additionally, as part of this change the error exit paths were +simplified where possible. + +Reviewed-by: Tony Hutter +Reviewed-by: Rich Ercolani +Signed-off-by: Brian Behlendorf +Closes #12863 +(cherry picked from commit 8a02d01e85556bbe3a1c6947bc11b8ef028d4023) +Signed-off-by: Stoiko Ivanov +--- + module/os/linux/zfs/zvol_os.c | 121 ++++++++++++++++------------------ + 1 file changed, 58 insertions(+), 63 deletions(-) + +diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c +index 44caadd58..69479b3f7 100644 +--- a/module/os/linux/zfs/zvol_os.c ++++ b/module/os/linux/zfs/zvol_os.c +@@ -496,8 +496,7 @@ zvol_open(struct block_device *bdev, fmode_t flag) + { + zvol_state_t *zv; + int error = 0; +- boolean_t drop_suspend = B_TRUE; +- boolean_t drop_namespace = B_FALSE; ++ boolean_t drop_suspend = B_FALSE; + #ifndef HAVE_BLKDEV_GET_ERESTARTSYS + hrtime_t timeout = MSEC2NSEC(zvol_open_timeout_ms); + hrtime_t start = gethrtime(); +@@ -517,7 +516,36 @@ retry: + return (SET_ERROR(-ENXIO)); + } + +- if (zv->zv_open_count == 0 && !mutex_owned(&spa_namespace_lock)) { ++ mutex_enter(&zv->zv_state_lock); ++ /* ++ * Make sure zvol is not suspended during first open ++ * (hold zv_suspend_lock) and respect proper lock acquisition ++ * ordering - zv_suspend_lock before zv_state_lock ++ */ ++ if (zv->zv_open_count == 0) { ++ if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) { ++ mutex_exit(&zv->zv_state_lock); ++ rw_enter(&zv->zv_suspend_lock, RW_READER); ++ mutex_enter(&zv->zv_state_lock); ++ /* check to see if zv_suspend_lock is needed */ ++ if (zv->zv_open_count != 0) { ++ rw_exit(&zv->zv_suspend_lock); ++ } else { ++ drop_suspend = B_TRUE; ++ } ++ } else { ++ drop_suspend = B_TRUE; ++ } ++ } ++ rw_exit(&zvol_state_lock); ++ ++ ASSERT(MUTEX_HELD(&zv->zv_state_lock)); ++ ++ if (zv->zv_open_count == 0) { ++ boolean_t drop_namespace = B_FALSE; ++ ++ ASSERT(RW_READ_HELD(&zv->zv_suspend_lock)); ++ + /* + * In all other call paths the spa_namespace_lock is taken + * before the bdev->bd_mutex lock. However, on open(2) +@@ -542,84 +570,51 @@ retry: + * the kernel so the only option is to return the error for + * the caller to handle it. + */ +- if (!mutex_tryenter(&spa_namespace_lock)) { +- rw_exit(&zvol_state_lock); ++ if (!mutex_owned(&spa_namespace_lock)) { ++ if (!mutex_tryenter(&spa_namespace_lock)) { ++ mutex_exit(&zv->zv_state_lock); ++ rw_exit(&zv->zv_suspend_lock); + + #ifdef HAVE_BLKDEV_GET_ERESTARTSYS +- schedule(); +- return (SET_ERROR(-ERESTARTSYS)); +-#else +- if ((gethrtime() - start) > timeout) ++ schedule(); + return (SET_ERROR(-ERESTARTSYS)); ++#else ++ if ((gethrtime() - start) > timeout) ++ return (SET_ERROR(-ERESTARTSYS)); + +- schedule_timeout(MSEC_TO_TICK(10)); +- goto retry; ++ schedule_timeout(MSEC_TO_TICK(10)); ++ goto retry; + #endif +- } else { +- drop_namespace = B_TRUE; +- } +- } +- +- mutex_enter(&zv->zv_state_lock); +- /* +- * make sure zvol is not suspended during first open +- * (hold zv_suspend_lock) and respect proper lock acquisition +- * ordering - zv_suspend_lock before zv_state_lock +- */ +- if (zv->zv_open_count == 0) { +- if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) { +- mutex_exit(&zv->zv_state_lock); +- rw_enter(&zv->zv_suspend_lock, RW_READER); +- mutex_enter(&zv->zv_state_lock); +- /* check to see if zv_suspend_lock is needed */ +- if (zv->zv_open_count != 0) { +- rw_exit(&zv->zv_suspend_lock); +- drop_suspend = B_FALSE; ++ } else { ++ drop_namespace = B_TRUE; + } + } +- } else { +- drop_suspend = B_FALSE; +- } +- rw_exit(&zvol_state_lock); +- +- ASSERT(MUTEX_HELD(&zv->zv_state_lock)); + +- if (zv->zv_open_count == 0) { +- ASSERT(RW_READ_HELD(&zv->zv_suspend_lock)); + error = -zvol_first_open(zv, !(flag & FMODE_WRITE)); +- if (error) +- goto out_mutex; +- } + +- if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) { +- error = -EROFS; +- goto out_open_count; ++ if (drop_namespace) ++ mutex_exit(&spa_namespace_lock); + } + +- zv->zv_open_count++; +- +- mutex_exit(&zv->zv_state_lock); +- if (drop_namespace) +- mutex_exit(&spa_namespace_lock); +- if (drop_suspend) +- rw_exit(&zv->zv_suspend_lock); +- +- zfs_check_media_change(bdev); +- +- return (0); ++ if (error == 0) { ++ if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) { ++ if (zv->zv_open_count == 0) ++ zvol_last_close(zv); + +-out_open_count: +- if (zv->zv_open_count == 0) +- zvol_last_close(zv); ++ error = SET_ERROR(-EROFS); ++ } else { ++ zv->zv_open_count++; ++ } ++ } + +-out_mutex: + mutex_exit(&zv->zv_state_lock); +- if (drop_namespace) +- mutex_exit(&spa_namespace_lock); + if (drop_suspend) + rw_exit(&zv->zv_suspend_lock); + +- return (SET_ERROR(error)); ++ if (error == 0) ++ zfs_check_media_change(bdev); ++ ++ return (error); + } + + static void