Skip to content

Commit

Permalink
Ensure that default healthcheck duration is less than node timeout du…
Browse files Browse the repository at this point in the history
…ration

If the healthcheck does not pass before the node timeout period is
passed, the node is marked as being down. The default timings ensured
this happened, which is distinctly suboptimal.
  • Loading branch information
shs96c committed Jul 6, 2021
1 parent 9b1f2d7 commit 0795e78
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

public class DistributorOptions {

public static final int DEFAULT_HEALTHCHECK_INTERVAL = 300;
public static final int DEFAULT_HEALTHCHECK_INTERVAL = 120;
public static final String DISTRIBUTOR_SECTION = "distributor";
static final String DEFAULT_DISTRIBUTOR_IMPLEMENTATION =
"org.openqa.selenium.grid.distributor.local.LocalDistributor";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.openqa.selenium.grid.sessionmap.config.SessionMapOptions;
import org.openqa.selenium.grid.sessionqueue.NewSessionQueue;
import org.openqa.selenium.grid.sessionqueue.config.NewSessionQueueOptions;
import org.openqa.selenium.internal.Debug;
import org.openqa.selenium.internal.Either;
import org.openqa.selenium.internal.Require;
import org.openqa.selenium.remote.SessionId;
Expand Down Expand Up @@ -249,8 +250,6 @@ private void register(NodeStatus status) {
public LocalDistributor add(Node node) {
Require.nonNull("Node", node);

LOG.info(String.format("Added node %s at %s.", node.getId(), node.getUri()));

nodes.put(node.getId(), node);
model.add(node.getStatus());

Expand All @@ -259,6 +258,12 @@ public LocalDistributor add(Node node) {
allChecks.put(node.getId(), runnableHealthCheck);
hostChecker.submit(runnableHealthCheck, healthcheckInterval, Duration.ofSeconds(30));

LOG.info(String.format(
"Added node %s at %s. Health check every %ss",
node.getId(),
node.getUri(),
healthcheckInterval.toMillis() / 1000));

bus.fire(new NodeAddedEvent(node.getId()));

return this;
Expand All @@ -268,6 +273,8 @@ private Runnable asRunnableHealthCheck(Node node) {
HealthCheck healthCheck = node.getHealthCheck();
NodeId id = node.getId();
return () -> {
LOG.log(getDebugLogLevel(), "Running health check for " + node.getId());

HealthCheck.Result result;
try {
result = healthCheck.check();
Expand All @@ -279,6 +286,9 @@ private Runnable asRunnableHealthCheck(Node node) {
Lock writeLock = lock.writeLock();
writeLock.lock();
try {
LOG.log(
getDebugLogLevel(),
String.format("Health check result for %s was %s", node.getId(), result.getAvailability()));
model.setAvailability(id, result.getAvailability());
} finally {
writeLock.unlock();
Expand Down

0 comments on commit 0795e78

Please sign in to comment.