Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

dispute-coordinator: past session dispute slashing #6811

Merged
merged 76 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
76 commits
Select commit Hold shift + click to select a range
8dbeae7
runtime/vstaging: unapplied_slashes runtime API
ordian Feb 1, 2023
d3273d7
runtime/vstaging: key_ownership_proof runtime API
ordian Feb 2, 2023
8394592
runtime/ParachainHost: submit_report_dispute_lost
ordian Feb 2, 2023
d31edbb
fix key_ownership_proof API
ordian Feb 2, 2023
5808d0b
runtime: submit_report_dispute_lost runtime API
ordian Feb 3, 2023
78d9da0
nits
ordian Feb 3, 2023
7caf651
Update node/subsystem-types/src/messages.rs
ordian Feb 5, 2023
ab08d67
merge master and resolve ExecutorParams conflicts
ordian Feb 27, 2023
62e3831
revert unrelated fmt changes
ordian Feb 27, 2023
5223ef3
dispute-coordinator: past session dispute slashing
ordian Mar 2, 2023
af6921d
encapsule runtime api call for submitting report
ordian Mar 5, 2023
9d4e21f
prettify: extract into a function
ordian Mar 5, 2023
39def6a
do not exit on runtime api error
ordian Mar 6, 2023
1495b47
fix tests
ordian Mar 7, 2023
5046c2d
Merge branch 'master' into ao-past-session-slashing-client
ordian Mar 7, 2023
f2119e4
Merge branch 'master' into ao-past-session-slashing-runtime
ordian Mar 7, 2023
a062b6c
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian Mar 7, 2023
eca1786
try initial zombienet test
ordian Mar 7, 2023
13b6271
try something
ordian Mar 7, 2023
77ec15a
fix a typo
ordian Mar 7, 2023
49cb564
try cumulus-based collator
ordian Mar 7, 2023
288696e
fix clippy
ordian Mar 9, 2023
36d60d9
build polkadot-debug images with fast-runtime enabled
ordian Mar 9, 2023
35d76d3
merge master, resolve conflicts, bump to v5
ordian Mar 16, 2023
b3b094c
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian Mar 16, 2023
1494ff9
wip
ordian Mar 21, 2023
aafc9b0
runtime/inclusion: fix availability_threshold
ordian Mar 21, 2023
dc6452e
fix wip
ordian Mar 21, 2023
e90a31f
Merge branch 'master' into ao-past-session-slashing-client
ordian Mar 21, 2023
b8e8c24
fix wip II
ordian Mar 21, 2023
4dce060
revert native provider
ordian Mar 21, 2023
a150569
Merge branch 'master' into ao-past-session-slashing-runtime
ordian Mar 21, 2023
58ea172
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian Mar 21, 2023
92fe5b4
propagate tx submission error
ordian Mar 23, 2023
ed759e6
Merge branch 'master' into ao-past-session-slashing-client
ordian Mar 23, 2023
fbc0e9d
Merge branch 'master' into ao-past-session-slashing-runtime
ordian Mar 23, 2023
e48615e
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian Mar 23, 2023
cc96d7f
DEBUG: sync=trace
ordian Mar 24, 2023
e9dc42b
print key ownership proof len
ordian Mar 28, 2023
176c7b2
Merge branch 'master' into ao-past-session-slashing-client
ordian Mar 28, 2023
92b13c4
panic repro
ordian Mar 29, 2023
0f3a964
log validator index in panic message
ordian Mar 29, 2023
2ba635b
Merge branch 'master' into ao-past-session-slashing-runtime
ordian Mar 29, 2023
55c7947
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian Mar 29, 2023
ef78e51
post merge fixes
ordian Mar 29, 2023
e7b5a79
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian Mar 29, 2023
4f660d1
Merge branch 'master' into ao-past-session-slashing-runtime
ordian Apr 12, 2023
89a1a16
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian Apr 12, 2023
3db92ba
replace debug assertion with a log
ordian Apr 24, 2023
1edd122
Merge branch 'master' into ao-past-session-slashing-client
ordian Apr 24, 2023
2b57c83
fix compilation
ordian Apr 24, 2023
a70fbc5
Let's log the dispatch info in validate block.
bkchr Apr 26, 2023
63871ac
fix double encoding
ordian Apr 26, 2023
0196b63
Merge branch 'ao-past-session-slashing-client' of github.com:parityte…
ordian Apr 26, 2023
6a4f7b3
Revert "Let's log the dispatch info in validate block."
ordian Apr 26, 2023
63e3885
Revert "Let's log the dispatch info in validate block."
bkchr Apr 26, 2023
029796a
Merge branch 'master' into ao-past-session-slashing-client
ordian Apr 26, 2023
2c3f827
Merge branch 'ao-past-session-slashing-client' of github.com:parityte…
ordian Apr 26, 2023
a58b144
Merge branch 'master' into ao-past-session-slashing-runtime
ordian Apr 26, 2023
e6a14c8
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian Apr 26, 2023
bce0379
fix compilation
ordian Apr 24, 2023
aa3247e
update to latest zombienet and fix test
ordian May 22, 2023
d6a6f34
lower finality lag to 11
ordian May 22, 2023
c95ad77
bump zombienet again
ordian May 22, 2023
b39be9a
add a workaround, but still does not work
ordian May 22, 2023
a767717
Update .gitlab-ci.yml
pepoviola May 23, 2023
f45594d
Merge branch 'master' into ao-past-session-slashing-runtime
ordian May 23, 2023
4bd06fb
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian May 23, 2023
cfe4be4
add a comment and search logs on all nodes
ordian May 24, 2023
9cb0723
Merge branch 'master' into ao-past-session-slashing-runtime
ordian May 24, 2023
1ae22c5
Merge branch 'master' into ao-past-session-slashing-runtime
ordian May 24, 2023
f2bcd9b
Merge branch 'master' into ao-past-session-slashing-runtime
ordian May 25, 2023
8bb8e07
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian May 25, 2023
f621e4a
Merge branch 'master' into ao-past-session-slashing-runtime
ordian May 25, 2023
92c5228
Merge branch 'ao-past-session-slashing-runtime' into ao-past-session-…
ordian May 25, 2023
02fd54d
Merge branch 'master' into ao-past-session-slashing-client
ordian May 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 141 additions & 4 deletions node/core/dispute-coordinator/src/initialized.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use futures::{

use sc_keystore::LocalKeystore;

use parity_scale_codec::Encode;
use polkadot_node_primitives::{
disputes::ValidCandidateVotes, CandidateVotes, DisputeStatus, SignedDisputeStatement, Timestamp,
};
Expand All @@ -35,11 +36,12 @@ use polkadot_node_subsystem::{
},
overseer, ActivatedLeaf, ActiveLeavesUpdate, FromOrchestra, OverseerSignal,
};
use polkadot_node_subsystem_util::rolling_session_window::{
RollingSessionWindow, SessionWindowUpdate, SessionsUnavailable,
use polkadot_node_subsystem_util::{
rolling_session_window::{RollingSessionWindow, SessionWindowUpdate, SessionsUnavailable},
runtime::{key_ownership_proof, submit_report_dispute_lost},
};
use polkadot_primitives::{
BlockNumber, CandidateHash, CandidateReceipt, CompactStatement, DisputeStatement,
vstaging, BlockNumber, CandidateHash, CandidateReceipt, CompactStatement, DisputeStatement,
DisputeStatementSet, Hash, ScrapedOnChainVotes, SessionIndex, SessionInfo,
ValidDisputeStatementKind, ValidatorId, ValidatorIndex,
};
Expand All @@ -49,6 +51,7 @@ use crate::{
import::{CandidateEnvironment, CandidateVoteState},
is_potential_spam,
metrics::Metrics,
scraping::ScrapedUpdates,
status::{get_active_with_status, Clock},
DisputeCoordinatorSubsystem, LOG_TARGET,
};
Expand Down Expand Up @@ -314,9 +317,12 @@ impl Initialized {
Ok(SessionWindowUpdate::Unchanged) => {},
};

let ScrapedUpdates { unapplied_slashes, on_chain_votes, .. } = scraped_updates;

self.process_unapplied_slashes(ctx, new_leaf.hash, unapplied_slashes).await;
// The `runtime-api` subsystem has an internal queue which serializes the execution,
// so there is no point in running these in parallel.
for votes in scraped_updates.on_chain_votes {
for votes in on_chain_votes {
let _ = self.process_on_chain_votes(ctx, overlay_db, votes, now).await.map_err(
|error| {
gum::warn!(
Expand All @@ -332,6 +338,137 @@ impl Initialized {
Ok(())
}

/// For each unapplied (past-session) slash, report an unsigned extrinsic
/// to the runtime.
async fn process_unapplied_slashes<Context>(
&mut self,
ctx: &mut Context,
relay_parent: Hash,
unapplied_slashes: Vec<(SessionIndex, CandidateHash, vstaging::slashing::PendingSlashes)>,
) {
for (session_index, candidate_hash, pending) in unapplied_slashes {
gum::info!(
target: LOG_TARGET,
?session_index,
?candidate_hash,
n_slashes = pending.keys.len(),
"Processing unapplied validator slashes",
);

let inclusions = self.scraper.get_blocks_including_candidate(&candidate_hash);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We only keep that information for the unfinalized chain (and a bit more). Is that enough for this purpose?

Copy link
Member Author

@ordian ordian May 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. For the scraped included blocks this is fine. If we finalized a different block at the same height, that probably means the dispute has concluded before, so we should have called this already on some block producing node and DISPUTE_CANDIDATE_LIFETIME_AFTER_FINALIZATION helps with this race condition.

There's a potential problem with runtime API calls to key_ownership_proof though since this block will be pruned once we reorg to and finalize the undisputed chain. But again, there's a time window between dispute concluding and that happening, so we should be able to slash with some high probability in that case.

Copy link
Member

@eskimor eskimor May 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am uneasy about this. The raciness for participation already bothers me, but here it is even worse. I am not blocking this PR on this, as racy past session slashing is better than no past session slashing, but let's discuss this further.

debug_assert!(
!inclusions.is_empty(),
"Couldn't find inclusion parent for an unapplied slash",
);

// Find the first inclusion parent that we can use
// to generate key ownership proof on.
// We use inclusion parents because of the proper session index.
let mut key_ownership_proofs = Vec::new();
let mut dispute_proofs = Vec::new();

for (_height, inclusion_parent) in inclusions {
for (validator_index, validator_id) in pending.keys.iter() {
let res =
key_ownership_proof(ctx.sender(), inclusion_parent, validator_id.clone())
tdimitrov marked this conversation as resolved.
Show resolved Hide resolved
.await;

match res {
Ok(Some(key_ownership_proof)) => {
key_ownership_proofs.push(key_ownership_proof);
let time_slot = vstaging::slashing::DisputesTimeSlot::new(
session_index,
candidate_hash.clone(),
);
let dispute_proof = vstaging::slashing::DisputeProof {
time_slot,
kind: pending.kind,
validator_index: *validator_index,
validator_id: validator_id.clone(),
};
dispute_proofs.push(dispute_proof);
},
Ok(None) => {},
Err(error) => {
gum::debug!(
target: LOG_TARGET,
?error,
?session_index,
?candidate_hash,
?validator_id,
"Could not generate key ownership proof",
);
},
}
}

if !key_ownership_proofs.is_empty() {
ordian marked this conversation as resolved.
Show resolved Hide resolved
debug_assert_eq!(key_ownership_proofs.len(), pending.keys.len());
break
}
}

let expected_keys = pending.keys.len();
let resolved_keys = key_ownership_proofs.len();
if resolved_keys < expected_keys {
gum::warn!(
target: LOG_TARGET,
?session_index,
?candidate_hash,
"Could not generate key ownership proofs for {} keys",
expected_keys - resolved_keys,
);
}
debug_assert_eq!(resolved_keys, dispute_proofs.len());

for (key_ownership_proof, dispute_proof) in
key_ownership_proofs.into_iter().zip(dispute_proofs.into_iter())
{
let validator_id = dispute_proof.validator_id.clone();
let opaque_key_ownership_proof =
vstaging::slashing::OpaqueKeyOwnershipProof::new(key_ownership_proof.encode());

let res = submit_report_dispute_lost(
ctx.sender(),
relay_parent,
dispute_proof,
opaque_key_ownership_proof,
)
.await;

match res {
Err(error) => {
gum::warn!(
target: LOG_TARGET,
?error,
?session_index,
?candidate_hash,
"Error reporting pending slash",
);
},
Ok(Some(())) => {
gum::info!(
target: LOG_TARGET,
?session_index,
?candidate_hash,
?validator_id,
"Successfully reported pending slash",
);
},
Ok(None) => {
gum::debug!(
target: LOG_TARGET,
?session_index,
?candidate_hash,
?validator_id,
"Duplicate pending slash report",
);
},
}
}
}
}

/// Scrapes on-chain votes (backing votes and concluded disputes) for a active leaf of the
/// relay chain.
async fn process_on_chain_votes<Context>(
Expand Down
23 changes: 18 additions & 5 deletions node/core/dispute-coordinator/src/scraping/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@ use polkadot_node_subsystem::{
messages::ChainApiMessage, overseer, ActivatedLeaf, ActiveLeavesUpdate, ChainApiError,
SubsystemSender,
};
use polkadot_node_subsystem_util::runtime::{get_candidate_events, get_on_chain_votes};
use polkadot_node_subsystem_util::runtime::{
get_candidate_events, get_on_chain_votes, get_unapplied_slashes,
};
use polkadot_primitives::{
BlockNumber, CandidateEvent, CandidateHash, CandidateReceipt, Hash, ScrapedOnChainVotes,
vstaging, BlockNumber, CandidateEvent, CandidateHash, CandidateReceipt, Hash,
ScrapedOnChainVotes, SessionIndex,
};

use crate::{
Expand Down Expand Up @@ -64,11 +67,16 @@ const LRU_OBSERVED_BLOCKS_CAPACITY: NonZeroUsize = match NonZeroUsize::new(20) {
pub struct ScrapedUpdates {
pub on_chain_votes: Vec<ScrapedOnChainVotes>,
pub included_receipts: Vec<CandidateReceipt>,
pub unapplied_slashes: Vec<(SessionIndex, CandidateHash, vstaging::slashing::PendingSlashes)>,
ordian marked this conversation as resolved.
Show resolved Hide resolved
}

impl ScrapedUpdates {
pub fn new() -> Self {
Self { on_chain_votes: Vec::new(), included_receipts: Vec::new() }
Self {
on_chain_votes: Vec::new(),
included_receipts: Vec::new(),
unapplied_slashes: Vec::new(),
}
}
}

Expand Down Expand Up @@ -120,7 +128,7 @@ impl Inclusions {
.retain(|_, blocks_including| blocks_including.keys().len() > 0);
}

pub fn get(&mut self, candidate: &CandidateHash) -> Vec<(BlockNumber, Hash)> {
pub fn get(&self, candidate: &CandidateHash) -> Vec<(BlockNumber, Hash)> {
let mut inclusions_as_vec: Vec<(BlockNumber, Hash)> = Vec::new();
if let Some(blocks_including) = self.inclusions_inner.get(candidate) {
for (height, blocks_at_height) in blocks_including.iter() {
Expand Down Expand Up @@ -252,6 +260,11 @@ impl ChainScraper {
}
}

// for unapplied slashes, we only look at the latest activated hash,
// it should accumulate them all
let unapplied_slashes = get_unapplied_slashes(sender, activated.hash).await?;
scraped_updates.unapplied_slashes = unapplied_slashes;

self.last_observed_blocks.put(activated.hash, ());

Ok(scraped_updates)
Expand Down Expand Up @@ -396,7 +409,7 @@ impl ChainScraper {
}

pub fn get_blocks_including_candidate(
&mut self,
&self,
candidate: &CandidateHash,
) -> Vec<(BlockNumber, Hash)> {
self.inclusions.get(candidate)
Expand Down
72 changes: 62 additions & 10 deletions node/core/runtime-api/src/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ use lru::LruCache;
use sp_consensus_babe::Epoch;

use polkadot_primitives::{
vstaging::ExecutorParams, AuthorityDiscoveryId, BlockNumber, CandidateCommitments,
CandidateEvent, CandidateHash, CommittedCandidateReceipt, CoreState, DisputeState,
GroupRotationInfo, Hash, Id as ParaId, InboundDownwardMessage, InboundHrmpMessage,
OccupiedCoreAssumption, PersistedValidationData, PvfCheckStatement, ScrapedOnChainVotes,
SessionIndex, SessionInfo, ValidationCode, ValidationCodeHash, ValidatorId, ValidatorIndex,
ValidatorSignature,
vstaging, AuthorityDiscoveryId, BlockNumber, CandidateCommitments, CandidateEvent,
CandidateHash, CommittedCandidateReceipt, CoreState, DisputeState, GroupRotationInfo, Hash,
Id as ParaId, InboundDownwardMessage, InboundHrmpMessage, OccupiedCoreAssumption,
PersistedValidationData, PvfCheckStatement, ScrapedOnChainVotes, SessionIndex, SessionInfo,
ValidationCode, ValidationCodeHash, ValidatorId, ValidatorIndex, ValidatorSignature,
};

/// For consistency we have the same capacity for all caches. We use 128 as we'll only need that
Expand All @@ -52,7 +51,7 @@ pub(crate) struct RequestResultCache {
validation_code_by_hash: LruCache<ValidationCodeHash, Option<ValidationCode>>,
candidate_pending_availability: LruCache<(Hash, ParaId), Option<CommittedCandidateReceipt>>,
candidate_events: LruCache<Hash, Vec<CandidateEvent>>,
session_executor_params: LruCache<SessionIndex, Option<ExecutorParams>>,
session_executor_params: LruCache<SessionIndex, Option<vstaging::ExecutorParams>>,
session_info: LruCache<SessionIndex, SessionInfo>,
dmq_contents: LruCache<(Hash, ParaId), Vec<InboundDownwardMessage<BlockNumber>>>,
inbound_hrmp_channels_contents:
Expand All @@ -64,6 +63,10 @@ pub(crate) struct RequestResultCache {
LruCache<(Hash, ParaId, OccupiedCoreAssumption), Option<ValidationCodeHash>>,
version: LruCache<Hash, u32>,
disputes: LruCache<Hash, Vec<(SessionIndex, CandidateHash, DisputeState<BlockNumber>)>>,
unapplied_slashes:
LruCache<Hash, Vec<(SessionIndex, CandidateHash, vstaging::slashing::PendingSlashes)>>,
key_ownership_proof:
LruCache<(Hash, ValidatorId), Option<vstaging::slashing::OpaqueKeyOwnershipProof>>,
}

impl Default for RequestResultCache {
Expand Down Expand Up @@ -91,6 +94,8 @@ impl Default for RequestResultCache {
validation_code_hash: LruCache::new(DEFAULT_CACHE_CAP),
version: LruCache::new(DEFAULT_CACHE_CAP),
disputes: LruCache::new(DEFAULT_CACHE_CAP),
unapplied_slashes: LruCache::new(DEFAULT_CACHE_CAP),
key_ownership_proof: LruCache::new(DEFAULT_CACHE_CAP),
}
}
}
Expand Down Expand Up @@ -269,14 +274,14 @@ impl RequestResultCache {
pub(crate) fn session_executor_params(
&mut self,
session_index: SessionIndex,
) -> Option<&Option<ExecutorParams>> {
) -> Option<&Option<vstaging::ExecutorParams>> {
self.session_executor_params.get(&session_index)
}

pub(crate) fn cache_session_executor_params(
&mut self,
session_index: SessionIndex,
value: Option<ExecutorParams>,
value: Option<vstaging::ExecutorParams>,
) {
self.session_executor_params.put(session_index, value);
}
Expand Down Expand Up @@ -386,6 +391,44 @@ impl RequestResultCache {
) {
self.disputes.put(relay_parent, value);
}

pub(crate) fn unapplied_slashes(
&mut self,
relay_parent: &Hash,
) -> Option<&Vec<(SessionIndex, CandidateHash, vstaging::slashing::PendingSlashes)>> {
self.unapplied_slashes.get(relay_parent)
}

pub(crate) fn cache_unapplied_slashes(
&mut self,
relay_parent: Hash,
value: Vec<(SessionIndex, CandidateHash, vstaging::slashing::PendingSlashes)>,
) {
self.unapplied_slashes.put(relay_parent, value);
}

pub(crate) fn key_ownership_proof(
&mut self,
key: (Hash, ValidatorId),
) -> Option<&Option<vstaging::slashing::OpaqueKeyOwnershipProof>> {
self.key_ownership_proof.get(&key)
}

pub(crate) fn cache_key_ownership_proof(
&mut self,
key: (Hash, ValidatorId),
value: Option<vstaging::slashing::OpaqueKeyOwnershipProof>,
) {
self.key_ownership_proof.put(key, value);
}

// This request is never cached, hence always returns `None`.
pub(crate) fn submit_report_dispute_lost(
&mut self,
_key: (Hash, vstaging::slashing::DisputeProof, vstaging::slashing::OpaqueKeyOwnershipProof),
) -> Option<&Option<()>> {
None
}
}

pub(crate) enum RequestResult {
Expand All @@ -407,7 +450,7 @@ pub(crate) enum RequestResult {
ValidationCodeByHash(Hash, ValidationCodeHash, Option<ValidationCode>),
CandidatePendingAvailability(Hash, ParaId, Option<CommittedCandidateReceipt>),
CandidateEvents(Hash, Vec<CandidateEvent>),
SessionExecutorParams(Hash, SessionIndex, Option<ExecutorParams>),
SessionExecutorParams(Hash, SessionIndex, Option<vstaging::ExecutorParams>),
SessionInfo(Hash, SessionIndex, Option<SessionInfo>),
DmqContents(Hash, ParaId, Vec<InboundDownwardMessage<BlockNumber>>),
InboundHrmpChannelsContents(
Expand All @@ -423,4 +466,13 @@ pub(crate) enum RequestResult {
ValidationCodeHash(Hash, ParaId, OccupiedCoreAssumption, Option<ValidationCodeHash>),
Version(Hash, u32),
Disputes(Hash, Vec<(SessionIndex, CandidateHash, DisputeState<BlockNumber>)>),
UnappliedSlashes(Hash, Vec<(SessionIndex, CandidateHash, vstaging::slashing::PendingSlashes)>),
KeyOwnershipProof(Hash, ValidatorId, Option<vstaging::slashing::OpaqueKeyOwnershipProof>),
// This is a request with side-effects.
SubmitReportDisputeLost(
Hash,
vstaging::slashing::DisputeProof,
vstaging::slashing::OpaqueKeyOwnershipProof,
Option<()>,
),
}
Loading