-
Notifications
You must be signed in to change notification settings - Fork 108
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Signature consensus ratio config #1950
Changes from 5 commits
927db60
aca197f
94e97ee
52b06b1
5e34144
059eab9
cced080
bb652c3
6782926
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -23,32 +23,56 @@ | |||||
import com.google.common.collect.HashMultimap; | ||||||
import com.google.common.collect.Multimap; | ||||||
import com.google.common.collect.Sets; | ||||||
import io.micrometer.core.instrument.Counter; | ||||||
import io.micrometer.core.instrument.MeterRegistry; | ||||||
import java.security.PublicKey; | ||||||
import java.security.Signature; | ||||||
import java.util.Collection; | ||||||
import java.util.HashSet; | ||||||
import java.util.Map; | ||||||
import java.util.Set; | ||||||
import java.util.TreeSet; | ||||||
import java.util.concurrent.ConcurrentHashMap; | ||||||
import java.util.stream.Collectors; | ||||||
import javax.inject.Named; | ||||||
import lombok.RequiredArgsConstructor; | ||||||
import lombok.extern.log4j.Log4j2; | ||||||
|
||||||
import com.hedera.mirror.importer.addressbook.AddressBookService; | ||||||
import com.hedera.mirror.importer.domain.AddressBook; | ||||||
import com.hedera.mirror.importer.domain.EntityId; | ||||||
import com.hedera.mirror.importer.domain.EntityTypeEnum; | ||||||
import com.hedera.mirror.importer.domain.FileStreamSignature; | ||||||
import com.hedera.mirror.importer.domain.FileStreamSignature.SignatureStatus; | ||||||
import com.hedera.mirror.importer.exception.SignatureVerificationException; | ||||||
|
||||||
@Named | ||||||
@Log4j2 | ||||||
@RequiredArgsConstructor | ||||||
public class NodeSignatureVerifier { | ||||||
|
||||||
private final AddressBookService addressBookService; | ||||||
private final CommonDownloaderProperties commonDownloaderProperties; | ||||||
|
||||||
// Metrics | ||||||
private final MeterRegistry meterRegistry; | ||||||
private final Map<EntityId, Counter> metricMap = new ConcurrentHashMap<>(); | ||||||
private final Counter.Builder missingNodeSignatureFileMetric; | ||||||
private final Counter.Builder signatureVerificationMetric; | ||||||
|
||||||
public NodeSignatureVerifier(AddressBookService addressBookService, | ||||||
CommonDownloaderProperties commonDownloaderProperties, | ||||||
MeterRegistry meterRegistry) { | ||||||
this.addressBookService = addressBookService; | ||||||
this.commonDownloaderProperties = commonDownloaderProperties; | ||||||
this.meterRegistry = meterRegistry; | ||||||
|
||||||
missingNodeSignatureFileMetric = Counter.builder("hedera.mirror.download.signature.missing") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it might make sense to just have a single metric for both. We can add a
Right now there's a problem that if a node is missing the dividend of that equation is decreasing, skewing the accuracy of that calculation. We need to ensure we always have the total be equal to the number of nodes in the address book. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Combined into one counter |
||||||
.description("The number of nodes whose signatures are missing from the consensus process"); | ||||||
signatureVerificationMetric = Counter.builder("hedera.mirror.download.signature.verification") | ||||||
.description("The number of signatures verified from a particular node"); | ||||||
} | ||||||
|
||||||
private static boolean canReachConsensus(long actualNodes, long expectedNodes) { | ||||||
return actualNodes >= Math.ceil(expectedNodes / 3.0); | ||||||
private boolean canReachConsensus(long actualNodes, long expectedNodes) { | ||||||
return actualNodes >= Math.ceil(expectedNodes * commonDownloaderProperties.getConsensusRatio()); | ||||||
} | ||||||
|
||||||
/** | ||||||
|
@@ -76,9 +100,13 @@ public void verify(Collection<FileStreamSignature> signatures) throws SignatureV | |||||
long sigFileCount = signatures.size(); | ||||||
long nodeCount = nodeAccountIDPubKeyMap.size(); | ||||||
if (!canReachConsensus(sigFileCount, nodeCount)) { | ||||||
throw new SignatureVerificationException("Require at least 1/3 signature files to reach consensus, got " + | ||||||
sigFileCount + " out of " + nodeCount + " for file " + filename + ": " + statusMap(signatures, | ||||||
nodeAccountIDPubKeyMap)); | ||||||
throw new SignatureVerificationException(String.format( | ||||||
"Requires at least %s of signature files to reach consensus, got %d out of %d for file %s: %s", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update troubleshooting doc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like that file and the alert based on it has been out of date for ages. It's still looking for logs in the form “File could not be verified by at least 2/3 of nodes” There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gonna modify this to start with "Insufficient downloaded signature file count, requires at least..." to make ti easier to alert on and understand. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about "Unable to reach consensus"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Didn't see this before my last push. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you string formatting twice?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||||||
String.format("%.03f", commonDownloaderProperties.getConsensusRatio()), | ||||||
sigFileCount, | ||||||
nodeCount, | ||||||
filename, | ||||||
statusMap(signatures, nodeAccountIDPubKeyMap))); | ||||||
} | ||||||
|
||||||
for (FileStreamSignature fileStreamSignature : signatures) { | ||||||
|
@@ -104,6 +132,9 @@ public void verify(Collection<FileStreamSignature> signatures) throws SignatureV | |||||
log.warn("Verified signature file {} reached consensus but with some errors: {}", filename, | ||||||
statusMap(signatures, nodeAccountIDPubKeyMap)); | ||||||
return; | ||||||
} else if (commonDownloaderProperties.getConsensusRatio() == 0) { | ||||||
log.debug("Signature file {} does not require consensus, skipping verification", filename); | ||||||
return; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will pass when no signature file is verified, i.e., the possible effect is an illegitimate stream file can pass verification and get ingested There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point. You're highlighting the fact that even if consensusRatio is set to 0 we should still only proceed with no error if there are some valid signature stream files. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we probably should not log it at all unless it's important. the updated if block is dead code, since when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right, I thought this was the scenario you highlighted in your original comment, no. Probably better to move this check up to after verification but before consensus calculation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||||||
} | ||||||
|
||||||
throw new SignatureVerificationException("Signature verification failed for file " + filename + ": " + statusMap(signatures, nodeAccountIDPubKeyMap)); | ||||||
|
@@ -153,17 +184,45 @@ private boolean verifySignature(FileStreamSignature fileStreamSignature, | |||||
return false; | ||||||
} | ||||||
|
||||||
private Map<String, Collection<String>> statusMap(Collection<FileStreamSignature> signatures, Map<String, | ||||||
private Map<String, Collection<EntityId>> statusMap(Collection<FileStreamSignature> signatures, Map<String, | ||||||
PublicKey> nodeAccountIDPubKeyMap) { | ||||||
Map<String, Collection<String>> statusMap = signatures.stream() | ||||||
Map<String, Collection<EntityId>> statusMap = signatures.stream() | ||||||
.collect(Collectors.groupingBy(fss -> fss.getStatus().toString(), | ||||||
Collectors.mapping(FileStreamSignature::getNodeAccountIdString, Collectors | ||||||
Collectors.mapping(FileStreamSignature::getNodeAccountId, Collectors | ||||||
.toCollection(TreeSet::new)))); | ||||||
Set<String> seenNodes = signatures.stream().map(FileStreamSignature::getNodeAccountIdString) | ||||||
.collect(Collectors.toSet()); | ||||||
Set<String> missingNodes = new TreeSet<>(Sets.difference(nodeAccountIDPubKeyMap.keySet(), seenNodes)); | ||||||
|
||||||
Set<EntityId> seenNodes = new HashSet<>(); | ||||||
signatures.forEach(signature -> { | ||||||
seenNodes.add(signature.getNodeAccountId()); | ||||||
EntityId nodeAccountId = signature.getNodeAccountId(); | ||||||
|
||||||
metricMap.computeIfAbsent(nodeAccountId, n -> signatureVerificationMetric | ||||||
.tag("nodeAccount", nodeAccountId.getEntityNum().toString()) | ||||||
.tag("realm", nodeAccountId.getRealmNum().toString()) | ||||||
.tag("shard", nodeAccountId.getShardNum().toString()) | ||||||
.tag("status", signature.getStatus().toString()) | ||||||
.tag("type", signature.getStreamType().toString()) | ||||||
.register(meterRegistry)) | ||||||
.increment(); | ||||||
}); | ||||||
|
||||||
Set<EntityId> missingNodes = new TreeSet<>(Sets.difference( | ||||||
nodeAccountIDPubKeyMap.keySet().stream().map(x -> EntityId.of(x, EntityTypeEnum.ACCOUNT)) | ||||||
.collect(Collectors.toSet()), | ||||||
seenNodes)); | ||||||
statusMap.put("MISSING", missingNodes); | ||||||
statusMap.remove(SignatureStatus.CONSENSUS_REACHED.toString()); | ||||||
|
||||||
String streamType = signatures.stream().map(FileStreamSignature::getStreamType).findFirst().toString(); | ||||||
missingNodes.forEach(nodeAccountId -> metricMap | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Two different counters can't be stored in the same map. As it is now, only the first counter is being created and then incremented for both. You will need two separate maps. Or if we combine metrics can ignore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Combined into one counter needing only one map |
||||||
.computeIfAbsent(nodeAccountId, n -> missingNodeSignatureFileMetric | ||||||
.tag("nodeAccount", n.getEntityNum().toString()) | ||||||
.tag("realm", nodeAccountId.getRealmNum().toString()) | ||||||
.tag("shard", nodeAccountId.getShardNum().toString()) | ||||||
.tag("type", streamType) | ||||||
.register(meterRegistry)) | ||||||
.increment()); | ||||||
|
||||||
return statusMap; | ||||||
} | ||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need to store these as fields since their construction happens so rarely and it's better to encapsulate the metric creation in a single method. See PublishMetrics for an example.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adopted similar logic