From 08ff1a6e34dbca550b2736eaf2599460c59ebfc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastian=20K=C3=B6cher?= Date: Thu, 15 Dec 2022 10:53:15 +0100 Subject: [PATCH 1/5] Fix missing block number issue on forced canonicalization There is this issue about missing block numbers on forced canonicalization. I looked over the code now 10000 times and there are possible ways this can be triggered, but I don't really know how this is triggered. So, this pr is going to solve the symptom and not the cause. The block number to hash mapping is set when we import a new best block. Forced canonicalization will now stop at the best block and it will canonicalize the other blocks later when the best block moved. As the error reports indicated that this issue mainly happened on major sync, there should not be any forks, so not doing the canonicalization directly shouldn't be that harmful. All known implementations should import all blocks as best block on major sync anyway (I mean somewhere there is the bug, but I didn't yet found it). I will also do some changes to Cumulus around some potential culprit for this issue. Closes: https://github.com/paritytech/substrate/issues/12613 --- client/db/src/lib.rs | 146 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 133 insertions(+), 13 deletions(-) diff --git a/client/db/src/lib.rs b/client/db/src/lib.rs index 4452d5dcb3f29..e06e78cdf68a4 100644 --- a/client/db/src/lib.rs +++ b/client/db/src/lib.rs @@ -1213,9 +1213,8 @@ impl Backend { let meta = self.blockchain.meta.read(); - if meta.best_number > best_number && - (meta.best_number - best_number).saturated_into::() > - self.canonicalization_delay + if meta.best_number.saturating_sub(best_number).saturated_into::() > + self.canonicalization_delay { return Err(sp_blockchain::Error::SetHeadTooOld) } @@ -1320,31 +1319,55 @@ impl Backend { number: NumberFor, ) -> ClientResult<()> { let number_u64 = number.saturated_into::(); - if number_u64 > self.canonicalization_delay { - let new_canonical = number_u64 - self.canonicalization_delay; + if number_u64 <= self.canonicalization_delay { + return Ok(()) + } - if new_canonical <= self.storage.state_db.best_canonical().unwrap_or(0) { - return Ok(()) - } - let hash = if new_canonical == number_u64 { + let new_canonical = number_u64 - self.canonicalization_delay; + let best_canonical = self.storage.state_db.best_canonical().unwrap_or(0); + + if new_canonical <= best_canonical { + return Ok(()) + } + + let best_number = self.blockchain.info().best_number.saturated_into(); + + // We can not canonicalize beyond the `best_number` as setting the best block also sets the + // mapping from block number to hash that is required down below. This is just some safety + // guard against potential bugs. + let last_to_canonicalize = std::cmp::min(best_number, new_canonical); + + if new_canonical > best_number { + trace!( + target: "db", + "Block to force canonicalize (#{new_canonical}) is above the best block (#{best_number})", + ); + } + + // As there could be some gap between the point when we last canonicalized and the block we + // want to canonicalize (`new_canonical`), we canonicalize from the best canonicalized block + // to the last block to canonicalize. + for to_canonicalize in best_canonical + 1..=last_to_canonicalize { + let hash = if to_canonicalize == number_u64 { hash } else { sc_client_api::blockchain::HeaderBackend::hash( &self.blockchain, - new_canonical.saturated_into(), + to_canonicalize.saturated_into(), )? .ok_or_else(|| { sp_blockchain::Error::Backend(format!( "Can't canonicalize missing block number #{} when importing {:?} (#{})", - new_canonical, hash, number, + to_canonicalize, hash, number, )) })? }; - if !sc_client_api::Backend::have_state_at(self, hash, new_canonical.saturated_into()) { + if !sc_client_api::Backend::have_state_at(self, hash, to_canonicalize.saturated_into()) + { return Ok(()) } - trace!(target: "db", "Canonicalize block #{} ({:?})", new_canonical, hash); + trace!(target: "db", "Canonicalize block #{} ({:?})", to_canonicalize, hash); let commit = self.storage.state_db.canonicalize_block(&hash).map_err( sp_blockchain::Error::from_state_db::< sc_state_db::Error, @@ -1352,6 +1375,7 @@ impl Backend { )?; apply_state_commit(transaction, commit); } + Ok(()) } @@ -3801,4 +3825,100 @@ pub(crate) mod tests { assert_eq!(backend.blockchain().leaves().unwrap(), vec![block2]); assert_eq!(backend.blockchain().info().best_hash, block2); } + + #[test] + fn force_delayed_canonicalize_waiting_for_blocks_to_be_finalized() { + let backend = Backend::::new_test(10, 1); + + let genesis = + insert_block(&backend, 0, Default::default(), None, Default::default(), vec![], None) + .unwrap(); + + let block1 = { + let mut op = backend.begin_operation().unwrap(); + backend.begin_state_operation(&mut op, genesis).unwrap(); + let header = Header { + number: 1, + parent_hash: genesis, + state_root: BlakeTwo256::trie_root(Vec::new(), StateVersion::V1), + digest: Default::default(), + extrinsics_root: Default::default(), + }; + + op.set_block_data(header.clone(), Some(Vec::new()), None, None, NewBlockState::Normal) + .unwrap(); + + backend.commit_operation(op).unwrap(); + + header.hash() + }; + + assert_eq!(0, backend.storage.state_db.best_canonical().unwrap()); + + let block2 = { + let mut op = backend.begin_operation().unwrap(); + backend.begin_state_operation(&mut op, block1).unwrap(); + let header = Header { + number: 2, + parent_hash: block1, + state_root: BlakeTwo256::trie_root(Vec::new(), StateVersion::V1), + digest: Default::default(), + extrinsics_root: Default::default(), + }; + + op.set_block_data(header.clone(), Some(Vec::new()), None, None, NewBlockState::Normal) + .unwrap(); + + backend.commit_operation(op).unwrap(); + + header.hash() + }; + + assert_eq!(0, backend.storage.state_db.best_canonical().unwrap()); + + let block3 = { + let mut op = backend.begin_operation().unwrap(); + backend.begin_state_operation(&mut op, block2).unwrap(); + let header = Header { + number: 3, + parent_hash: block2, + state_root: BlakeTwo256::trie_root(Vec::new(), StateVersion::V1), + digest: Default::default(), + extrinsics_root: Default::default(), + }; + + op.set_block_data(header.clone(), Some(Vec::new()), None, None, NewBlockState::Best) + .unwrap(); + + backend.commit_operation(op).unwrap(); + + header.hash() + }; + + let block4 = { + let mut op = backend.begin_operation().unwrap(); + backend.begin_state_operation(&mut op, block3).unwrap(); + let header = Header { + number: 4, + parent_hash: block3, + state_root: BlakeTwo256::trie_root(Vec::new(), StateVersion::V1), + digest: Default::default(), + extrinsics_root: Default::default(), + }; + + op.set_block_data(header.clone(), Some(Vec::new()), None, None, NewBlockState::Best) + .unwrap(); + + backend.commit_operation(op).unwrap(); + + header.hash() + }; + + assert_eq!(3, backend.storage.state_db.best_canonical().unwrap()); + + assert_eq!(block1, backend.blockchain().hash(1).unwrap().unwrap()); + assert_eq!(block2, backend.blockchain().hash(2).unwrap().unwrap()); + assert_eq!(block3, backend.blockchain().hash(3).unwrap().unwrap()); + assert_eq!(block4, backend.blockchain().hash(4).unwrap().unwrap()); + } } From 1ffbbb01aca8279809e3ccc684ad5cf83e582ac3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastian=20K=C3=B6cher?= Date: Fri, 16 Dec 2022 00:10:33 +0100 Subject: [PATCH 2/5] Add some docs --- client/db/src/lib.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/client/db/src/lib.rs b/client/db/src/lib.rs index e06e78cdf68a4..78d2d00bb0bd2 100644 --- a/client/db/src/lib.rs +++ b/client/db/src/lib.rs @@ -3855,6 +3855,8 @@ pub(crate) mod tests { assert_eq!(0, backend.storage.state_db.best_canonical().unwrap()); + // This should not trigger any forced canonicalization as we didn't have imported any best + // block by now. let block2 = { let mut op = backend.begin_operation().unwrap(); backend.begin_state_operation(&mut op, block1).unwrap(); @@ -3876,6 +3878,8 @@ pub(crate) mod tests { assert_eq!(0, backend.storage.state_db.best_canonical().unwrap()); + // This should also not trigger it yet, because we import a best block, but the best block + // from the POV of the db is still at `0`. let block3 = { let mut op = backend.begin_operation().unwrap(); backend.begin_state_operation(&mut op, block2).unwrap(); @@ -3895,6 +3899,7 @@ pub(crate) mod tests { header.hash() }; + // Now it should kick in. let block4 = { let mut op = backend.begin_operation().unwrap(); backend.begin_state_operation(&mut op, block3).unwrap(); From dc607869cd4a101e2e7cb31d3d3844b62028d6d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastian=20K=C3=B6cher?= Date: Fri, 16 Dec 2022 09:37:44 +0100 Subject: [PATCH 3/5] Fix fix --- client/db/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/client/db/src/lib.rs b/client/db/src/lib.rs index 78d2d00bb0bd2..389413f0d5ebb 100644 --- a/client/db/src/lib.rs +++ b/client/db/src/lib.rs @@ -1331,6 +1331,10 @@ impl Backend { } let best_number = self.blockchain.info().best_number.saturated_into(); + // If the `best_number` is one off from the current block we are just importing, we can + // take the block number of the current block as the best block. Even if this block is not + // imported as best block, we know its hash and can canonicalize it if required below. + let best_number = if best_number + 1 == number_u64 { number_u64 } else { best_number }; // We can not canonicalize beyond the `best_number` as setting the best block also sets the // mapping from block number to hash that is required down below. This is just some safety From b59a9a057a4a3f3926ebf33d259102b2a20ee1d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastian=20K=C3=B6cher?= Date: Fri, 16 Dec 2022 12:16:33 +0100 Subject: [PATCH 4/5] Review comments --- client/db/src/lib.rs | 108 +++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/client/db/src/lib.rs b/client/db/src/lib.rs index 389413f0d5ebb..ee65fd254490d 100644 --- a/client/db/src/lib.rs +++ b/client/db/src/lib.rs @@ -1315,64 +1315,36 @@ impl Backend { fn force_delayed_canonicalize( &self, transaction: &mut Transaction, - hash: Block::Hash, - number: NumberFor, ) -> ClientResult<()> { - let number_u64 = number.saturated_into::(); - if number_u64 <= self.canonicalization_delay { - return Ok(()) - } - - let new_canonical = number_u64 - self.canonicalization_delay; - let best_canonical = self.storage.state_db.best_canonical().unwrap_or(0); - - if new_canonical <= best_canonical { - return Ok(()) - } - - let best_number = self.blockchain.info().best_number.saturated_into(); - // If the `best_number` is one off from the current block we are just importing, we can - // take the block number of the current block as the best block. Even if this block is not - // imported as best block, we know its hash and can canonicalize it if required below. - let best_number = if best_number + 1 == number_u64 { number_u64 } else { best_number }; + let best_canonical = || self.storage.state_db.best_canonical().unwrap_or(0); + let info = self.blockchain.info(); + let best_number: u64 = self.blockchain.info().best_number.saturated_into(); + let best_hash = info.best_hash; - // We can not canonicalize beyond the `best_number` as setting the best block also sets the - // mapping from block number to hash that is required down below. This is just some safety - // guard against potential bugs. - let last_to_canonicalize = std::cmp::min(best_number, new_canonical); + while best_number.saturating_sub(best_canonical()) > self.canonicalization_delay { + let to_canonicalize = best_canonical() + 1; - if new_canonical > best_number { - trace!( - target: "db", - "Block to force canonicalize (#{new_canonical}) is above the best block (#{best_number})", - ); - } - - // As there could be some gap between the point when we last canonicalized and the block we - // want to canonicalize (`new_canonical`), we canonicalize from the best canonicalized block - // to the last block to canonicalize. - for to_canonicalize in best_canonical + 1..=last_to_canonicalize { - let hash = if to_canonicalize == number_u64 { - hash - } else { - sc_client_api::blockchain::HeaderBackend::hash( - &self.blockchain, - to_canonicalize.saturated_into(), - )? - .ok_or_else(|| { - sp_blockchain::Error::Backend(format!( - "Can't canonicalize missing block number #{} when importing {:?} (#{})", - to_canonicalize, hash, number, - )) - })? - }; - if !sc_client_api::Backend::have_state_at(self, hash, to_canonicalize.saturated_into()) - { + let hash_to_canonicalize = sc_client_api::blockchain::HeaderBackend::hash( + &self.blockchain, + to_canonicalize.saturated_into(), + )? + .ok_or_else(|| { + sp_blockchain::Error::Backend(format!( + "Can't canonicalize missing block number #{} when for best block {:?} (#{})", + to_canonicalize, best_hash, best_number, + )) + })?; + + if !sc_client_api::Backend::have_state_at( + self, + hash_to_canonicalize, + to_canonicalize.saturated_into(), + ) { return Ok(()) } - trace!(target: "db", "Canonicalize block #{} ({:?})", to_canonicalize, hash); - let commit = self.storage.state_db.canonicalize_block(&hash).map_err( + trace!(target: "db", "Canonicalize block #{} ({:?})", to_canonicalize, hash_to_canonicalize); + let commit = self.storage.state_db.canonicalize_block(&hash_to_canonicalize).map_err( sp_blockchain::Error::from_state_db::< sc_state_db::Error, >, @@ -1587,7 +1559,7 @@ impl Backend { )?; } else { // canonicalize blocks which are old enough, regardless of finality. - self.force_delayed_canonicalize(&mut transaction, hash, *header.number())? + self.force_delayed_canonicalize(&mut transaction)? } if !existing_header { @@ -2787,6 +2759,33 @@ pub(crate) mod tests { .into(); let hash = header.hash(); + op.set_block_data(header, Some(vec![]), None, None, NewBlockState::Best) + .unwrap(); + + backend.commit_operation(op).unwrap(); + hash + }; + + let hashof4 = { + let mut op = backend.begin_operation().unwrap(); + backend.begin_state_operation(&mut op, hashof3).unwrap(); + let mut header = Header { + number: 4, + parent_hash: hashof3, + state_root: Default::default(), + digest: Default::default(), + extrinsics_root: Default::default(), + }; + + let storage: Vec<(_, _)> = vec![]; + + header.state_root = op + .old_state + .storage_root(storage.iter().cloned().map(|(x, y)| (x, Some(y))), state_version) + .0 + .into(); + let hash = header.hash(); + op.set_block_data(header, Some(vec![]), None, None, NewBlockState::Best) .unwrap(); @@ -2802,6 +2801,7 @@ pub(crate) mod tests { backend.finalize_block(hashof1, None).unwrap(); backend.finalize_block(hashof2, None).unwrap(); backend.finalize_block(hashof3, None).unwrap(); + backend.finalize_block(hashof4, None).unwrap(); assert!(backend .storage .db @@ -3923,7 +3923,7 @@ pub(crate) mod tests { header.hash() }; - assert_eq!(3, backend.storage.state_db.best_canonical().unwrap()); + assert_eq!(2, backend.storage.state_db.best_canonical().unwrap()); assert_eq!(block1, backend.blockchain().hash(1).unwrap().unwrap()); assert_eq!(block2, backend.blockchain().hash(2).unwrap().unwrap()); From 71b87a6b0f7327723c965c53e16d5bc8ece6c927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastian=20K=C3=B6cher?= Date: Fri, 16 Dec 2022 13:51:35 +0100 Subject: [PATCH 5/5] Review comments --- client/db/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/db/src/lib.rs b/client/db/src/lib.rs index ee65fd254490d..7c2635f843418 100644 --- a/client/db/src/lib.rs +++ b/client/db/src/lib.rs @@ -1319,7 +1319,6 @@ impl Backend { let best_canonical = || self.storage.state_db.best_canonical().unwrap_or(0); let info = self.blockchain.info(); let best_number: u64 = self.blockchain.info().best_number.saturated_into(); - let best_hash = info.best_hash; while best_number.saturating_sub(best_canonical()) > self.canonicalization_delay { let to_canonicalize = best_canonical() + 1; @@ -1329,9 +1328,10 @@ impl Backend { to_canonicalize.saturated_into(), )? .ok_or_else(|| { + let best_hash = info.best_hash; + sp_blockchain::Error::Backend(format!( - "Can't canonicalize missing block number #{} when for best block {:?} (#{})", - to_canonicalize, best_hash, best_number, + "Can't canonicalize missing block number #{to_canonicalize} when for best block {best_hash:?} (#{best_number})", )) })?;