Skip to content

Commit

Permalink
Fixes #133 (#135)
Browse files Browse the repository at this point in the history
* Fixes #133

* Use locks (also fix locking in S3)
  • Loading branch information
Enkidu93 authored Nov 10, 2023
1 parent b9b9eba commit 4fbe2af
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, st
builder.Services
.AddHttpClient("ClearML-NoRetry")
.ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString));

builder.Services.AddSingleton<ClearMLHealthCheck>();
builder.Services.AddHealthChecks().AddCheck<ClearMLHealthCheck>("ClearML Health Check");

return builder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ public class ClearMLHealthCheck : IHealthCheck
private readonly HttpClient _httpClient;
private readonly IOptionsMonitor<ClearMLOptions> _options;
private readonly IClearMLAuthenticationService _clearMLAuthenticationService;
private int _numConsecutiveFailures;
private readonly AsyncLock _lock;

public ClearMLHealthCheck(
IClearMLAuthenticationService clearMLAuthenticationService,
Expand All @@ -13,6 +15,8 @@ IOptionsMonitor<ClearMLOptions> options
_httpClient = httpClientFactory.CreateClient("ClearML-NoRetry");
_options = options;
_clearMLAuthenticationService = clearMLAuthenticationService;
_numConsecutiveFailures = 0;
_lock = new AsyncLock();
}

public async Task<HealthCheckResult> CheckHealthAsync(
Expand All @@ -28,11 +32,19 @@ public async Task<HealthCheckResult> CheckHealthAsync(
return HealthCheckResult.Unhealthy(
$"No ClearML agents are available for configured queue \"{_options.CurrentValue.Queue}\""
);
using (await _lock.LockAsync())
_numConsecutiveFailures = 0;
return HealthCheckResult.Healthy("ClearML is available");
}
catch (Exception e)
{
return HealthCheckResult.Unhealthy(exception: e);
using (await _lock.LockAsync())
{
_numConsecutiveFailures++;
return _numConsecutiveFailures > 3
? HealthCheckResult.Unhealthy(exception: e)
: HealthCheckResult.Degraded(exception: e);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ public async Task<HealthCheckResult> CheckHealthAsync(
}
)
).ListObjectsV2Async(request, cancellationToken);
_numConsecutiveFailures = 0;
using (await _lock.LockAsync())
_numConsecutiveFailures = 0;
return HealthCheckResult.Healthy("The S3 bucket is available");
}
catch (Exception e)
Expand Down

0 comments on commit 4fbe2af

Please sign in to comment.