Skip to content

Commit

Permalink
KAFKA-13916; Fenced replicas should not be allowed to join the ISR in…
Browse files Browse the repository at this point in the history
… KRaft (KIP-841, Part 2) (apache#12181)

This path implements [KIP-841](https://cwiki.apache.org/confluence/display/KAFKA/KIP-841%3A+Fenced+replicas+should+not+be+allowed+to+join+the+ISR+in+KRaft). Specifically, it implements the following:
* It introduces INELIGIBLE_REPLICA and NEW_LEADER_ELECTED error codes.
* The KRaft controller validates the new ISR provided in the AlterPartition request and rejects the call if any replica in the new ISR is not eligible to join the the ISR - e.g. when fenced or shutting down. The leader reverts to the last committed ISR when its request is rejected due to this.
* The partition leader also verifies that a replica is eligible before trying to add it back to the ISR. If it is not eligible, the ISR expansion is not triggered at all.
* Updates the AlterPartition API to use topic ids. Updates the AlterPartition manger to handle topic names/ids. Updates the ZK controller and the KRaft controller to handle topic names/ids depending on the version of the request used.

Reviewers: Artem Livshits <84364232+artemlivshits@users.noreply.github.com>, José Armando García Sancio <jsancio@users.noreply.github.com>, Jason Gustafson <jason@confluent.io>
  • Loading branch information
dajac committed Jun 14, 2022
1 parent 3189a86 commit f83d95d
Show file tree
Hide file tree
Showing 32 changed files with 1,738 additions and 597 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.common.errors;

public class IneligibleReplicaException extends ApiException {
public IneligibleReplicaException(String message) {
super(message);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.common.errors;

public class NewLeaderElectedException extends ApiException {
public NewLeaderElectedException(String message) {
super(message);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import org.apache.kafka.common.errors.InconsistentTopicIdException;
import org.apache.kafka.common.errors.InconsistentVoterSetException;
import org.apache.kafka.common.errors.InconsistentClusterIdException;
import org.apache.kafka.common.errors.IneligibleReplicaException;
import org.apache.kafka.common.errors.InvalidCommitOffsetSizeException;
import org.apache.kafka.common.errors.InvalidConfigurationException;
import org.apache.kafka.common.errors.InvalidFetchSessionEpochException;
Expand All @@ -77,6 +78,7 @@
import org.apache.kafka.common.errors.LogDirNotFoundException;
import org.apache.kafka.common.errors.MemberIdRequiredException;
import org.apache.kafka.common.errors.NetworkException;
import org.apache.kafka.common.errors.NewLeaderElectedException;
import org.apache.kafka.common.errors.NoReassignmentInProgressException;
import org.apache.kafka.common.errors.NotControllerException;
import org.apache.kafka.common.errors.NotCoordinatorException;
Expand Down Expand Up @@ -364,7 +366,9 @@ public enum Errors {
INCONSISTENT_TOPIC_ID(103, "The log's topic ID did not match the topic ID in the request", InconsistentTopicIdException::new),
INCONSISTENT_CLUSTER_ID(104, "The clusterId in the request does not match that found on the server", InconsistentClusterIdException::new),
TRANSACTIONAL_ID_NOT_FOUND(105, "The transactionalId could not be found", TransactionalIdNotFoundException::new),
FETCH_SESSION_TOPIC_ID_ERROR(106, "The fetch session encountered inconsistent topic ID usage", FetchSessionTopicIdException::new);
FETCH_SESSION_TOPIC_ID_ERROR(106, "The fetch session encountered inconsistent topic ID usage", FetchSessionTopicIdException::new),
INELIGIBLE_REPLICA(107, "The new ISR contains at least one ineligible replica.", IneligibleReplicaException::new),
NEW_LEADER_ELECTED(108, "The AlterPartition request successfully updated the partition state but the leader has changed.", NewLeaderElectedException::new);

private static final Logger log = LoggerFactory.getLogger(Errors.class);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ public AlterPartitionRequestData data() {
@Override
public AbstractResponse getErrorResponse(int throttleTimeMs, Throwable e) {
return new AlterPartitionResponse(new AlterPartitionResponseData()
.setThrottleTimeMs(throttleTimeMs)
.setErrorCode(Errors.forException(e).code()));
.setThrottleTimeMs(throttleTimeMs)
.setErrorCode(Errors.forException(e).code()));
}

public static AlterPartitionRequest parse(ByteBuffer buffer, short version) {
Expand All @@ -57,8 +57,21 @@ public static class Builder extends AbstractRequest.Builder<AlterPartitionReques

private final AlterPartitionRequestData data;

public Builder(AlterPartitionRequestData data) {
super(ApiKeys.ALTER_PARTITION);
/**
* Constructs a builder for AlterPartitionRequest.
*
* @param data The data to be sent. Note that because the version of the
* request is not known at this time, it is expected that all
* topics have a topic id and a topic name set.
* @param canUseTopicIds True if version 2 and above can be used.
*/
public Builder(AlterPartitionRequestData data, boolean canUseTopicIds) {
super(
ApiKeys.ALTER_PARTITION,
ApiKeys.ALTER_PARTITION.oldestVersion(),
// Version 1 is the maximum version that can be used without topic ids.
canUseTopicIds ? ApiKeys.ALTER_PARTITION.latestVersion() : 1
);
this.data = data;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,21 @@
"type": "request",
"listeners": ["zkBroker", "controller"],
"name": "AlterPartitionRequest",
"validVersions": "0-1",
// Version 1 adds LeaderRecoveryState field (KIP-704).
//
// Version 2 adds TopicId field to replace TopicName field (KIP-841).
"validVersions": "0-2",
"flexibleVersions": "0+",
"fields": [
{ "name": "BrokerId", "type": "int32", "versions": "0+", "entityType": "brokerId",
"about": "The ID of the requesting broker" },
{ "name": "BrokerEpoch", "type": "int64", "versions": "0+", "default": "-1",
"about": "The epoch of the requesting broker" },
{ "name": "Topics", "type": "[]TopicData", "versions": "0+", "fields": [
{ "name": "Name", "type": "string", "versions": "0+", "entityType": "topicName",
{ "name": "TopicName", "type": "string", "versions": "0-1", "ignorable": true, "entityType": "topicName",
"about": "The name of the topic to alter ISRs for" },
{ "name": "TopicId", "type": "uuid", "versions": "2+", "ignorable": true,
"about": "The ID of the topic to alter ISRs for" },
{ "name": "Partitions", "type": "[]PartitionData", "versions": "0+", "fields": [
{ "name": "PartitionIndex", "type": "int32", "versions": "0+",
"about": "The partition index" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,22 @@
"apiKey": 56,
"type": "response",
"name": "AlterPartitionResponse",
"validVersions": "0-1",
// Version 1 adds LeaderRecoveryState field (KIP-704).
//
// Version 2 adds TopicId field to replace TopicName field, can return the following new errors:
// INELIGIBLE_REPLICA, NEW_LEADER_ELECTED and UNKNOWN_TOPIC_ID (KIP-841).
"validVersions": "0-2",
"flexibleVersions": "0+",
"fields": [
{ "name": "ThrottleTimeMs", "type": "int32", "versions": "0+",
"about": "The duration in milliseconds for which the request was throttled due to a quota violation, or zero if the request did not violate any quota." },
{ "name": "ErrorCode", "type": "int16", "versions": "0+",
"about": "The top level response error code" },
{ "name": "Topics", "type": "[]TopicData", "versions": "0+", "fields": [
{ "name": "Name", "type": "string", "versions": "0+", "entityType": "topicName",
{ "name": "TopicName", "type": "string", "versions": "0-1", "ignorable": true, "entityType": "topicName",
"about": "The name of the topic" },
{ "name": "TopicId", "type": "uuid", "versions": "2+", "ignorable": true,
"about": "The ID of the topic" },
{ "name": "Partitions", "type": "[]PartitionData", "versions": "0+", "fields": [
{ "name": "PartitionIndex", "type": "int32", "versions": "0+",
"about": "The partition index" },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1321,9 +1321,10 @@ private AlterPartitionRequest createAlterPartitionRequest(short version) {
.setBrokerEpoch(123L)
.setBrokerId(1)
.setTopics(singletonList(new AlterPartitionRequestData.TopicData()
.setName("topic1")
.setTopicName("topic1")
.setTopicId(Uuid.randomUuid())
.setPartitions(singletonList(partitionData))));
return new AlterPartitionRequest.Builder(data).build(version);
return new AlterPartitionRequest.Builder(data, version >= 1).build(version);
}

private AlterPartitionResponse createAlterPartitionResponse(int version) {
Expand All @@ -1343,8 +1344,9 @@ private AlterPartitionResponse createAlterPartitionResponse(int version) {
.setErrorCode(Errors.NONE.code())
.setThrottleTimeMs(123)
.setTopics(singletonList(new AlterPartitionResponseData.TopicData()
.setName("topic1")
.setPartitions(singletonList(partitionData))));
.setTopicName("topic1")
.setTopicId(Uuid.randomUuid())
.setPartitions(singletonList(partitionData))));
return new AlterPartitionResponse(data);
}

Expand Down
Loading

0 comments on commit f83d95d

Please sign in to comment.