From 4fbe2afedb310828f6c9d48819f47380c17b958f Mon Sep 17 00:00:00 2001 From: "Eli C. Lowry" <83078660+Enkidu93@users.noreply.github.com> Date: Fri, 10 Nov 2023 14:31:03 -0500 Subject: [PATCH] Fixes #133 (#135) * Fixes #133 * Use locks (also fix locking in S3) --- .../Configuration/IMachineBuilderExtensions.cs | 2 +- .../Services/ClearMLHealthCheck.cs | 14 +++++++++++++- .../Services/S3HealthCheck.cs | 3 ++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/Machine/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/Machine/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index 26ea5f05..e5e94857 100644 --- a/src/Machine/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/Machine/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -121,7 +121,7 @@ public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, st builder.Services .AddHttpClient("ClearML-NoRetry") .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString)); - + builder.Services.AddSingleton(); builder.Services.AddHealthChecks().AddCheck("ClearML Health Check"); return builder; diff --git a/src/Machine/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs b/src/Machine/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs index 649efca0..d7458214 100644 --- a/src/Machine/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs +++ b/src/Machine/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs @@ -3,6 +3,8 @@ public class ClearMLHealthCheck : IHealthCheck private readonly HttpClient _httpClient; private readonly IOptionsMonitor _options; private readonly IClearMLAuthenticationService _clearMLAuthenticationService; + private int _numConsecutiveFailures; + private readonly AsyncLock _lock; public ClearMLHealthCheck( IClearMLAuthenticationService clearMLAuthenticationService, @@ -13,6 +15,8 @@ IOptionsMonitor options _httpClient = httpClientFactory.CreateClient("ClearML-NoRetry"); _options = options; _clearMLAuthenticationService = clearMLAuthenticationService; + _numConsecutiveFailures = 0; + _lock = new AsyncLock(); } public async Task CheckHealthAsync( @@ -28,11 +32,19 @@ public async Task CheckHealthAsync( return HealthCheckResult.Unhealthy( $"No ClearML agents are available for configured queue \"{_options.CurrentValue.Queue}\"" ); + using (await _lock.LockAsync()) + _numConsecutiveFailures = 0; return HealthCheckResult.Healthy("ClearML is available"); } catch (Exception e) { - return HealthCheckResult.Unhealthy(exception: e); + using (await _lock.LockAsync()) + { + _numConsecutiveFailures++; + return _numConsecutiveFailures > 3 + ? HealthCheckResult.Unhealthy(exception: e) + : HealthCheckResult.Degraded(exception: e); + } } } diff --git a/src/Machine/src/SIL.Machine.AspNetCore/Services/S3HealthCheck.cs b/src/Machine/src/SIL.Machine.AspNetCore/Services/S3HealthCheck.cs index aa30bf04..8275c61a 100644 --- a/src/Machine/src/SIL.Machine.AspNetCore/Services/S3HealthCheck.cs +++ b/src/Machine/src/SIL.Machine.AspNetCore/Services/S3HealthCheck.cs @@ -37,7 +37,8 @@ public async Task CheckHealthAsync( } ) ).ListObjectsV2Async(request, cancellationToken); - _numConsecutiveFailures = 0; + using (await _lock.LockAsync()) + _numConsecutiveFailures = 0; return HealthCheckResult.Healthy("The S3 bucket is available"); } catch (Exception e)