diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 124ecbf7..e335e757 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -11,9 +11,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Setup .NET
- uses: actions/setup-dotnet@v3
+ uses: actions/setup-dotnet@v4
with:
dotnet-version: 8.0.x
- name: Start MongoDB
diff --git a/Serval.sln b/Serval.sln
index 6cb275c0..8188624d 100644
--- a/Serval.sln
+++ b/Serval.sln
@@ -64,6 +64,20 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{92805246-528
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{BA044B98-3136-4FDE-B90F-B0975758C07F}"
EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Machine", "Machine", "{F6142E52-4B58-4D12-980F-B07D8AA932C2}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{D808D2BE-ED26-4E60-A409-AE58F7C1CB8F}"
+EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{40C225C2-1EEF-4D1D-9D14-1CBB86C8A1CB}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Serval.Machine.Shared", "src\Machine\src\Serval.Machine.Shared\Serval.Machine.Shared.csproj", "{090ECB69-464F-42C8-B92C-0808BE2802FA}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Serval.Machine.EngineServer", "src\Machine\src\Serval.Machine.EngineServer\Serval.Machine.EngineServer.csproj", "{C02494FB-663E-4430-9F2D-41F1A740B271}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Serval.Machine.JobServer", "src\Machine\src\Serval.Machine.JobServer\Serval.Machine.JobServer.csproj", "{BC766753-E560-4ADF-9923-C7A96076EA47}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Serval.Machine.Shared.Tests", "src\Machine\test\Serval.Machine.Shared.Tests\Serval.Machine.Shared.Tests.csproj", "{B0D23A55-AB09-4C2C-B309-F4BEB3BC968D}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -134,6 +148,22 @@ Global
{0E220C65-AA88-450E-AFB2-844E49060B3F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0E220C65-AA88-450E-AFB2-844E49060B3F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0E220C65-AA88-450E-AFB2-844E49060B3F}.Release|Any CPU.Build.0 = Release|Any CPU
+ {090ECB69-464F-42C8-B92C-0808BE2802FA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {090ECB69-464F-42C8-B92C-0808BE2802FA}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {090ECB69-464F-42C8-B92C-0808BE2802FA}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {090ECB69-464F-42C8-B92C-0808BE2802FA}.Release|Any CPU.Build.0 = Release|Any CPU
+ {C02494FB-663E-4430-9F2D-41F1A740B271}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C02494FB-663E-4430-9F2D-41F1A740B271}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C02494FB-663E-4430-9F2D-41F1A740B271}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C02494FB-663E-4430-9F2D-41F1A740B271}.Release|Any CPU.Build.0 = Release|Any CPU
+ {BC766753-E560-4ADF-9923-C7A96076EA47}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {BC766753-E560-4ADF-9923-C7A96076EA47}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {BC766753-E560-4ADF-9923-C7A96076EA47}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {BC766753-E560-4ADF-9923-C7A96076EA47}.Release|Any CPU.Build.0 = Release|Any CPU
+ {B0D23A55-AB09-4C2C-B309-F4BEB3BC968D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {B0D23A55-AB09-4C2C-B309-F4BEB3BC968D}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {B0D23A55-AB09-4C2C-B309-F4BEB3BC968D}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {B0D23A55-AB09-4C2C-B309-F4BEB3BC968D}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -160,6 +190,12 @@ Global
{3E753B99-7C31-42AC-B02E-012B802F58DB} = {6D20F76D-9A0E-44AC-8754-B4291C75D25B}
{92805246-5285-4F0A-9BF8-6EE4A027A41B} = {33E6965E-5A58-4C6F-882E-F17C8E88A3FF}
{BA044B98-3136-4FDE-B90F-B0975758C07F} = {33E6965E-5A58-4C6F-882E-F17C8E88A3FF}
+ {D808D2BE-ED26-4E60-A409-AE58F7C1CB8F} = {F6142E52-4B58-4D12-980F-B07D8AA932C2}
+ {40C225C2-1EEF-4D1D-9D14-1CBB86C8A1CB} = {F6142E52-4B58-4D12-980F-B07D8AA932C2}
+ {090ECB69-464F-42C8-B92C-0808BE2802FA} = {D808D2BE-ED26-4E60-A409-AE58F7C1CB8F}
+ {C02494FB-663E-4430-9F2D-41F1A740B271} = {D808D2BE-ED26-4E60-A409-AE58F7C1CB8F}
+ {BC766753-E560-4ADF-9923-C7A96076EA47} = {D808D2BE-ED26-4E60-A409-AE58F7C1CB8F}
+ {B0D23A55-AB09-4C2C-B309-F4BEB3BC968D} = {40C225C2-1EEF-4D1D-9D14-1CBB86C8A1CB}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {9F18C25E-E140-43C3-B177-D562E1628370}
diff --git a/src/Machine/src/Serval.Machine.EngineServer/Program.cs b/src/Machine/src/Serval.Machine.EngineServer/Program.cs
new file mode 100644
index 00000000..029e03df
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.EngineServer/Program.cs
@@ -0,0 +1,39 @@
+using Hangfire;
+using OpenTelemetry.Trace;
+
+var builder = WebApplication.CreateBuilder(args);
+
+// Add services to the container.
+builder
+ .Services.AddMachine(builder.Configuration)
+ .AddBuildJobService()
+ .AddMongoDataAccess()
+ .AddMongoHangfireJobClient()
+ .AddServalTranslationEngineService()
+ .AddModelCleanupService()
+ .AddMessageOutboxDeliveryService()
+ .AddClearMLService();
+
+if (builder.Environment.IsDevelopment())
+{
+ builder
+ .Services.AddOpenTelemetry()
+ .WithTracing(builder =>
+ {
+ builder
+ .AddAspNetCoreInstrumentation()
+ .AddHttpClientInstrumentation()
+ .AddGrpcClientInstrumentation()
+ .AddSource("MongoDB.Driver.Core.Extensions.DiagnosticSources")
+ .AddConsoleExporter();
+ });
+}
+
+var app = builder.Build();
+
+app.UseHttpsRedirection();
+
+app.MapServalTranslationEngineService();
+app.MapHangfireDashboard();
+
+app.Run();
diff --git a/src/Machine/src/Serval.Machine.EngineServer/Properties/launchSettings.json b/src/Machine/src/Serval.Machine.EngineServer/Properties/launchSettings.json
new file mode 100644
index 00000000..34eb2e94
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.EngineServer/Properties/launchSettings.json
@@ -0,0 +1,11 @@
+{
+ "profiles": {
+ "SIL.Machine.Serval.EngineServer": {
+ "commandName": "Project",
+ "environmentVariables": {
+ "ASPNETCORE_ENVIRONMENT": "Development"
+ },
+ "applicationUrl": "https://localhost:9000"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Machine/src/Serval.Machine.EngineServer/Serval.Machine.EngineServer.csproj b/src/Machine/src/Serval.Machine.EngineServer/Serval.Machine.EngineServer.csproj
new file mode 100644
index 00000000..89f4d8e6
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.EngineServer/Serval.Machine.EngineServer.csproj
@@ -0,0 +1,34 @@
+
+
+
+ net8.0
+ enable
+ enable
+ 34e222a9-ef76-48f9-869e-338547f9bd25
+ true
+ true
+ true
+ $(NoWarn);CS1591;CS1573
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ icu.net.dll.config
+
+
+
+
diff --git a/src/Machine/src/Serval.Machine.EngineServer/appsettings.Development.json b/src/Machine/src/Serval.Machine.EngineServer/appsettings.Development.json
new file mode 100644
index 00000000..1f2a4ef6
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.EngineServer/appsettings.Development.json
@@ -0,0 +1,21 @@
+{
+ "ConnectionStrings": {
+ "Hangfire": "mongodb://localhost:27017/machine_jobs",
+ "Mongo": "mongodb://localhost:27017/machine",
+ "Serval": "https://localhost:8444"
+ },
+ "ClearML": {
+ "MaxSteps": 1000,
+ "Project": "dev"
+ },
+ "SharedFile": {
+ "Uri": "s3://aqua-ml-data/dev/"
+ },
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning",
+ "System.Net.Http.HttpClient.Default": "Warning"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Machine/src/Serval.Machine.EngineServer/appsettings.Production.json b/src/Machine/src/Serval.Machine.EngineServer/appsettings.Production.json
new file mode 100644
index 00000000..1b2d3baf
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.EngineServer/appsettings.Production.json
@@ -0,0 +1,8 @@
+{
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Machine/src/Serval.Machine.EngineServer/appsettings.Staging.json b/src/Machine/src/Serval.Machine.EngineServer/appsettings.Staging.json
new file mode 100644
index 00000000..1b2d3baf
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.EngineServer/appsettings.Staging.json
@@ -0,0 +1,8 @@
+{
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Machine/src/Serval.Machine.EngineServer/appsettings.json b/src/Machine/src/Serval.Machine.EngineServer/appsettings.json
new file mode 100644
index 00000000..271163ff
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.EngineServer/appsettings.json
@@ -0,0 +1,43 @@
+{
+ "ConnectionStrings": {
+ "ClearML": "https://api.sil.hosted.allegro.ai"
+ },
+ "AllowedHosts": "*",
+ "Service": {
+ "ServiceId": "machine_engine"
+ },
+ "TranslationEngines": [
+ "SmtTransfer",
+ "Nmt"
+ ],
+ "BuildJob": {
+ "ClearML": [
+ {
+ "TranslationEngineType": "Nmt",
+ "ModelType": "huggingface",
+ "Queue": "jobs_backlog",
+ "DockerImage": "ghcr.io/sillsdev/machine.py:latest"
+ },
+ {
+ "TranslationEngineType": "SmtTransfer",
+ "ModelType": "thot",
+ "Queue": "cpu_only",
+ "DockerImage": "ghcr.io/sillsdev/machine.py:latest"
+ }
+ ]
+ },
+ "SmtTransferEngine": {
+ "EnginesDir": "/var/lib/machine/engines"
+ },
+ "ClearML": {
+ "BuildPollingEnabled": true
+ },
+ "MessageOutbox": {
+ "OutboxDir": "/var/lib/machine/outbox"
+ },
+ "Logging": {
+ "LogLevel": {
+ "System.Net.Http.HttpClient.Default": "Warning"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Machine/src/Serval.Machine.JobServer/Program.cs b/src/Machine/src/Serval.Machine.JobServer/Program.cs
new file mode 100644
index 00000000..d78bfed8
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.JobServer/Program.cs
@@ -0,0 +1,30 @@
+using OpenTelemetry.Trace;
+
+var builder = WebApplication.CreateBuilder(args);
+
+builder
+ .Services.AddMachine(builder.Configuration)
+ .AddBuildJobService()
+ .AddMongoDataAccess()
+ .AddMongoHangfireJobClient()
+ .AddHangfireJobServer()
+ .AddServalPlatformService()
+ .AddClearMLService();
+if (builder.Environment.IsDevelopment())
+{
+ builder
+ .Services.AddOpenTelemetry()
+ .WithTracing(builder =>
+ {
+ builder
+ .AddAspNetCoreInstrumentation()
+ .AddHttpClientInstrumentation()
+ .AddGrpcClientInstrumentation()
+ .AddSource("MongoDB.Driver.Core.Extensions.DiagnosticSources")
+ .AddConsoleExporter();
+ });
+}
+
+var app = builder.Build();
+
+app.Run();
diff --git a/src/Machine/src/Serval.Machine.JobServer/Properties/launchSettings.json b/src/Machine/src/Serval.Machine.JobServer/Properties/launchSettings.json
new file mode 100644
index 00000000..f636d0c3
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.JobServer/Properties/launchSettings.json
@@ -0,0 +1,12 @@
+{
+ "profiles": {
+ "SIL.Machine.Serval.JobServer": {
+ "commandName": "Project",
+ "launchBrowser": false,
+ "environmentVariables": {
+ "ASPNETCORE_ENVIRONMENT": "Development"
+ },
+ "applicationUrl": "https://localhost:9100"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Machine/src/Serval.Machine.JobServer/Serval.Machine.JobServer.csproj b/src/Machine/src/Serval.Machine.JobServer/Serval.Machine.JobServer.csproj
new file mode 100644
index 00000000..8a466b1d
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.JobServer/Serval.Machine.JobServer.csproj
@@ -0,0 +1,37 @@
+
+
+
+ net8.0
+ enable
+ enable
+ aa9e7440-5a04-4de6-ba51-bab9ef4a62e1
+ true
+ true
+ true
+ $(NoWarn);CS1591;CS1573
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ icu.net.dll.config
+
+
+
+
diff --git a/src/Machine/src/Serval.Machine.JobServer/appsettings.Development.json b/src/Machine/src/Serval.Machine.JobServer/appsettings.Development.json
new file mode 100644
index 00000000..1f2a4ef6
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.JobServer/appsettings.Development.json
@@ -0,0 +1,21 @@
+{
+ "ConnectionStrings": {
+ "Hangfire": "mongodb://localhost:27017/machine_jobs",
+ "Mongo": "mongodb://localhost:27017/machine",
+ "Serval": "https://localhost:8444"
+ },
+ "ClearML": {
+ "MaxSteps": 1000,
+ "Project": "dev"
+ },
+ "SharedFile": {
+ "Uri": "s3://aqua-ml-data/dev/"
+ },
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning",
+ "System.Net.Http.HttpClient.Default": "Warning"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Machine/src/Serval.Machine.JobServer/appsettings.Production.json b/src/Machine/src/Serval.Machine.JobServer/appsettings.Production.json
new file mode 100644
index 00000000..1b2d3baf
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.JobServer/appsettings.Production.json
@@ -0,0 +1,8 @@
+{
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Machine/src/Serval.Machine.JobServer/appsettings.Staging.json b/src/Machine/src/Serval.Machine.JobServer/appsettings.Staging.json
new file mode 100644
index 00000000..1b2d3baf
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.JobServer/appsettings.Staging.json
@@ -0,0 +1,8 @@
+{
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Machine/src/Serval.Machine.JobServer/appsettings.json b/src/Machine/src/Serval.Machine.JobServer/appsettings.json
new file mode 100644
index 00000000..738a4f28
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.JobServer/appsettings.json
@@ -0,0 +1,43 @@
+{
+ "ConnectionStrings": {
+ "ClearML": "https://api.sil.hosted.allegro.ai"
+ },
+ "AllowedHosts": "*",
+ "Service": {
+ "ServiceId": "machine_job"
+ },
+ "TranslationEngines": [
+ "SmtTransfer",
+ "Nmt"
+ ],
+ "BuildJob": {
+ "ClearML": [
+ {
+ "TranslationEngineType": "Nmt",
+ "ModelType": "huggingface",
+ "Queue": "jobs_backlog",
+ "DockerImage": "ghcr.io/sillsdev/machine.py:latest"
+ },
+ {
+ "TranslationEngineType": "SmtTransfer",
+ "ModelType": "thot",
+ "Queue": "jobs_backlog",
+ "DockerImage": "ghcr.io/sillsdev/machine.py:latest"
+ }
+ ]
+ },
+ "SmtTransferEngine": {
+ "EnginesDir": "/var/lib/machine/engines"
+ },
+ "ClearML": {
+ "BuildPollingEnabled": false
+ },
+ "MessageOutbox": {
+ "OutboxDir": "/var/lib/machine/outbox"
+ },
+ "Logging": {
+ "LogLevel": {
+ "System.Net.Http.HttpClient.Default": "Warning"
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/BuildJobOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/BuildJobOptions.cs
new file mode 100644
index 00000000..547a9dbd
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/BuildJobOptions.cs
@@ -0,0 +1,8 @@
+namespace Serval.Machine.Shared.Configuration;
+
+public class BuildJobOptions
+{
+ public const string Key = "BuildJob";
+
+ public IList ClearML { get; set; } = new List();
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/ClearMLBuildQueue.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/ClearMLBuildQueue.cs
new file mode 100644
index 00000000..53e25245
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/ClearMLBuildQueue.cs
@@ -0,0 +1,9 @@
+namespace Serval.Machine.Shared.Configuration;
+
+public class ClearMLBuildQueue
+{
+ public TranslationEngineType TranslationEngineType { get; set; }
+ public string ModelType { get; set; } = "";
+ public string Queue { get; set; } = "default";
+ public string DockerImage { get; set; } = "";
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/ClearMLOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/ClearMLOptions.cs
new file mode 100644
index 00000000..e72b7dec
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/ClearMLOptions.cs
@@ -0,0 +1,13 @@
+namespace Serval.Machine.Shared.Configuration;
+
+public class ClearMLOptions
+{
+ public const string Key = "ClearML";
+
+ public string AccessKey { get; set; } = "";
+ public string SecretKey { get; set; } = "";
+ public bool BuildPollingEnabled { get; set; } = false;
+ public TimeSpan BuildPollingTimeout { get; set; } = TimeSpan.FromSeconds(10);
+ public string RootProject { get; set; } = "Machine";
+ public string Project { get; set; } = "dev";
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IEndpointRouteBuilderExtensions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IEndpointRouteBuilderExtensions.cs
new file mode 100644
index 00000000..694dd67e
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IEndpointRouteBuilderExtensions.cs
@@ -0,0 +1,11 @@
+namespace Microsoft.AspNetCore.Builder;
+
+public static class IEndpointRouteBuilderExtensions
+{
+ public static IEndpointRouteBuilder MapServalTranslationEngineService(this IEndpointRouteBuilder builder)
+ {
+ builder.MapGrpcService();
+
+ return builder;
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilder.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilder.cs
new file mode 100644
index 00000000..f8dfbcd5
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilder.cs
@@ -0,0 +1,7 @@
+namespace Microsoft.Extensions.DependencyInjection;
+
+public interface IMachineBuilder
+{
+ IServiceCollection Services { get; }
+ IConfiguration? Configuration { get; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs
new file mode 100644
index 00000000..567a073e
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IMachineBuilderExtensions.cs
@@ -0,0 +1,437 @@
+using Serval.Translation.V1;
+
+namespace Microsoft.Extensions.DependencyInjection;
+
+public static class IMachineBuilderExtensions
+{
+ public static IMachineBuilder AddServiceOptions(
+ this IMachineBuilder builder,
+ Action configureOptions
+ )
+ {
+ builder.Services.Configure(configureOptions);
+ return builder;
+ }
+
+ public static IMachineBuilder AddServiceOptions(this IMachineBuilder builder, IConfiguration config)
+ {
+ builder.Services.Configure(config);
+ return builder;
+ }
+
+ public static IMachineBuilder AddSmtTransferEngineOptions(
+ this IMachineBuilder builder,
+ Action configureOptions
+ )
+ {
+ builder.Services.Configure(configureOptions);
+ return builder;
+ }
+
+ public static IMachineBuilder AddSmtTransferEngineOptions(this IMachineBuilder builder, IConfiguration config)
+ {
+ builder.Services.Configure(config);
+ return builder;
+ }
+
+ public static IMachineBuilder AddClearMLOptions(
+ this IMachineBuilder builder,
+ Action configureOptions
+ )
+ {
+ builder.Services.Configure(configureOptions);
+ return builder;
+ }
+
+ public static IMachineBuilder AddClearMLOptions(this IMachineBuilder builder, IConfiguration config)
+ {
+ builder.Services.Configure(config);
+ return builder;
+ }
+
+ public static IMachineBuilder AddMessageOutboxOptions(
+ this IMachineBuilder builder,
+ Action configureOptions
+ )
+ {
+ builder.Services.Configure(configureOptions);
+ return builder;
+ }
+
+ public static IMachineBuilder AddMessageOutboxOptions(this IMachineBuilder builder, IConfiguration config)
+ {
+ builder.Services.Configure(config);
+ return builder;
+ }
+
+ public static IMachineBuilder AddSharedFileOptions(
+ this IMachineBuilder builder,
+ Action configureOptions
+ )
+ {
+ builder.Services.Configure(configureOptions);
+ return builder;
+ }
+
+ public static IMachineBuilder AddSharedFileOptions(this IMachineBuilder builder, IConfiguration config)
+ {
+ builder.Services.Configure(config);
+ return builder;
+ }
+
+ public static IMachineBuilder AddBuildJobOptions(
+ this IMachineBuilder builder,
+ Action configureOptions
+ )
+ {
+ builder.Services.Configure(configureOptions);
+ return builder;
+ }
+
+ public static IMachineBuilder AddBuildJobOptions(this IMachineBuilder builder, IConfiguration config)
+ {
+ builder.Services.Configure(config);
+ return builder;
+ }
+
+ public static IMachineBuilder AddThotSmtModel(this IMachineBuilder builder)
+ {
+ if (builder.Configuration is null)
+ return builder.AddThotSmtModel(o => { });
+ else
+ return builder.AddThotSmtModel(builder.Configuration.GetSection(ThotSmtModelOptions.Key));
+ }
+
+ public static IMachineBuilder AddThotSmtModel(
+ this IMachineBuilder builder,
+ Action configureOptions
+ )
+ {
+ builder.Services.Configure(configureOptions);
+ builder.Services.AddSingleton();
+ return builder;
+ }
+
+ public static IMachineBuilder AddThotSmtModel(this IMachineBuilder builder, IConfiguration config)
+ {
+ builder.Services.Configure(config);
+ builder.Services.AddSingleton();
+ return builder;
+ }
+
+ public static IMachineBuilder AddTransferEngine(this IMachineBuilder builder)
+ {
+ builder.Services.AddSingleton();
+ return builder;
+ }
+
+ public static IMachineBuilder AddUnigramTruecaser(this IMachineBuilder builder)
+ {
+ builder.Services.AddSingleton();
+ return builder;
+ }
+
+ public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, string? connectionString = null)
+ {
+ connectionString ??= builder.Configuration?.GetConnectionString("ClearML");
+ if (connectionString is null)
+ throw new InvalidOperationException("ClearML connection string is required");
+
+ builder
+ .Services.AddHttpClient("ClearML")
+ .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString!))
+ // Add retry policy; fail after approx. 2 + 4 + 8 = 14 seconds
+ .AddTransientHttpErrorPolicy(b =>
+ b.WaitAndRetryAsync(3, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)))
+ );
+
+ builder.Services.AddSingleton();
+
+ // workaround register satisfying the interface and as a hosted service.
+ builder.Services.AddSingleton();
+ builder.Services.AddHostedService(p => p.GetRequiredService());
+
+ builder
+ .Services.AddHttpClient("ClearML-NoRetry")
+ .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString!));
+ builder.Services.AddSingleton();
+
+ builder.Services.AddHealthChecks().AddCheck("ClearML Health Check");
+
+ return builder;
+ }
+
+ private static MongoStorageOptions GetMongoStorageOptions()
+ {
+ var mongoStorageOptions = new MongoStorageOptions
+ {
+ MigrationOptions = new MongoMigrationOptions
+ {
+ MigrationStrategy = new MigrateMongoMigrationStrategy(),
+ BackupStrategy = new CollectionMongoBackupStrategy()
+ },
+ CheckConnection = true,
+ CheckQueuedJobsStrategy = CheckQueuedJobsStrategy.TailNotificationsCollection,
+ };
+ return mongoStorageOptions;
+ }
+
+ public static IMachineBuilder AddMongoHangfireJobClient(
+ this IMachineBuilder builder,
+ string? connectionString = null
+ )
+ {
+ connectionString ??= builder.Configuration?.GetConnectionString("Hangfire");
+ if (connectionString is null)
+ throw new InvalidOperationException("Hangfire connection string is required");
+
+ builder.Services.AddHangfire(c =>
+ c.SetDataCompatibilityLevel(CompatibilityLevel.Version_170)
+ .UseSimpleAssemblyNameTypeSerializer()
+ .UseRecommendedSerializerSettings()
+ .UseMongoStorage(connectionString, GetMongoStorageOptions())
+ .UseFilter(new AutomaticRetryAttribute { Attempts = 0 })
+ );
+ builder.Services.AddHealthChecks().AddCheck(name: "Hangfire");
+ return builder;
+ }
+
+ public static IMachineBuilder AddHangfireJobServer(
+ this IMachineBuilder builder,
+ IEnumerable? engineTypes = null
+ )
+ {
+ engineTypes ??=
+ builder.Configuration?.GetSection("TranslationEngines").Get()
+ ?? [TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt];
+ var queues = new List();
+ foreach (TranslationEngineType engineType in engineTypes.Distinct())
+ {
+ switch (engineType)
+ {
+ case TranslationEngineType.SmtTransfer:
+ builder.Services.AddSingleton();
+ builder.AddThotSmtModel().AddTransferEngine().AddUnigramTruecaser();
+ queues.Add("smt_transfer");
+ break;
+ case TranslationEngineType.Nmt:
+ queues.Add("nmt");
+ break;
+ }
+ }
+
+ builder.Services.AddHangfireServer(o =>
+ {
+ o.Queues = queues.ToArray();
+ });
+ return builder;
+ }
+
+ public static IMachineBuilder AddMemoryDataAccess(this IMachineBuilder builder)
+ {
+ builder.Services.AddMemoryDataAccess(o =>
+ {
+ o.AddRepository();
+ o.AddRepository();
+ o.AddRepository();
+ o.AddRepository();
+ o.AddRepository();
+ });
+
+ return builder;
+ }
+
+ public static IMachineBuilder AddMongoDataAccess(this IMachineBuilder builder, string? connectionString = null)
+ {
+ connectionString ??= builder.Configuration?.GetConnectionString("Mongo");
+ if (connectionString is null)
+ throw new InvalidOperationException("Mongo connection string is required");
+ builder.Services.AddMongoDataAccess(
+ connectionString!,
+ "Serval.Machine.Shared.Models",
+ o =>
+ {
+ o.AddRepository(
+ "translation_engines",
+ mapSetup: m => m.SetIgnoreExtraElements(true),
+ init: async c =>
+ {
+ await c.Indexes.CreateOrUpdateAsync(
+ new CreateIndexModel(
+ Builders
+ .IndexKeys.Ascending(e => e.EngineId)
+ .Ascending("currentBuild._id")
+ )
+ );
+ await c.Indexes.CreateOrUpdateAsync(
+ new CreateIndexModel(
+ Builders.IndexKeys.Ascending(e => e.CurrentBuild!.BuildJobRunner)
+ )
+ );
+ }
+ );
+ o.AddRepository("locks");
+ o.AddRepository(
+ "train_segment_pairs",
+ init: c =>
+ c.Indexes.CreateOrUpdateAsync(
+ new CreateIndexModel(
+ Builders.IndexKeys.Ascending(p => p.TranslationEngineRef)
+ )
+ )
+ );
+ o.AddRepository(
+ "outbox_messages",
+ mapSetup: m => m.MapProperty(m => m.OutboxRef).SetSerializer(new StringSerializer())
+ );
+ o.AddRepository(
+ "outboxes",
+ mapSetup: m => m.MapIdProperty(o => o.Id).SetSerializer(new StringSerializer())
+ );
+ }
+ );
+ builder.Services.AddHealthChecks().AddMongoDb(connectionString!, name: "Mongo");
+
+ return builder;
+ }
+
+ public static IMachineBuilder AddServalPlatformService(
+ this IMachineBuilder builder,
+ string? connectionString = null
+ )
+ {
+ connectionString ??= builder.Configuration?.GetConnectionString("Serval");
+ if (connectionString is null)
+ throw new InvalidOperationException("Serval connection string is required");
+
+ builder.Services.AddScoped();
+
+ builder.Services.AddSingleton();
+
+ builder.Services.AddScoped();
+
+ builder
+ .Services.AddGrpcClient(o =>
+ {
+ o.Address = new Uri(connectionString);
+ })
+ .ConfigureChannel(o =>
+ {
+ o.MaxRetryAttempts = null;
+ o.ServiceConfig = new ServiceConfig
+ {
+ MethodConfigs =
+ {
+ new MethodConfig
+ {
+ Names = { MethodName.Default },
+ RetryPolicy = new Grpc.Net.Client.Configuration.RetryPolicy
+ {
+ MaxAttempts = 10,
+ InitialBackoff = TimeSpan.FromSeconds(1),
+ MaxBackoff = TimeSpan.FromSeconds(5),
+ BackoffMultiplier = 1.5,
+ RetryableStatusCodes = { StatusCode.Unavailable }
+ }
+ },
+ new MethodConfig
+ {
+ Names =
+ {
+ new MethodName
+ {
+ Service = "serval.translation.v1.TranslationPlatformApi",
+ Method = "UpdateBuildStatus"
+ }
+ }
+ },
+ }
+ };
+ });
+
+ return builder;
+ }
+
+ public static IMachineBuilder AddServalTranslationEngineService(
+ this IMachineBuilder builder,
+ string? connectionString = null,
+ IEnumerable? engineTypes = null
+ )
+ {
+ builder.Services.AddGrpc(options =>
+ {
+ options.Interceptors.Add();
+ options.Interceptors.Add();
+ });
+ builder.AddServalPlatformService(connectionString);
+
+ engineTypes ??=
+ builder.Configuration?.GetSection("TranslationEngines").Get()
+ ?? [TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt];
+ foreach (TranslationEngineType engineType in engineTypes.Distinct())
+ {
+ switch (engineType)
+ {
+ case TranslationEngineType.SmtTransfer:
+ builder.Services.AddSingleton();
+ builder.Services.AddHostedService();
+ builder.AddThotSmtModel().AddTransferEngine().AddUnigramTruecaser();
+ builder.Services.AddScoped();
+ break;
+ case TranslationEngineType.Nmt:
+ builder.Services.AddScoped();
+ break;
+ }
+ }
+
+ return builder;
+ }
+
+ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, string? smtTransferEngineDir = null)
+ {
+ builder.Services.AddScoped();
+
+ builder.Services.AddScoped();
+ builder.Services.AddScoped();
+ builder.Services.AddScoped();
+ builder.Services.AddSingleton();
+ builder.Services.AddSingleton(x => x.GetRequiredService());
+ builder.Services.AddHostedService(p => p.GetRequiredService());
+
+ builder.Services.AddScoped();
+ builder.Services.AddScoped();
+ builder.Services.AddScoped();
+
+ if (smtTransferEngineDir is null)
+ {
+ var smtTransferEngineOptions = new SmtTransferEngineOptions();
+ builder.Configuration?.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions);
+ smtTransferEngineDir = smtTransferEngineOptions.EnginesDir;
+ }
+ string? driveLetter = Path.GetPathRoot(smtTransferEngineDir)?[..1];
+ if (driveLetter is null)
+ throw new InvalidOperationException("SMT Engine directory is required");
+ // add health check for disk storage capacity
+ builder
+ .Services.AddHealthChecks()
+ .AddDiskStorageHealthCheck(
+ x => x.AddDrive(driveLetter, 1_000), // 1GB
+ "SMT Engine Storage Capacity",
+ HealthStatus.Degraded
+ );
+
+ return builder;
+ }
+
+ public static IMachineBuilder AddModelCleanupService(this IMachineBuilder builder)
+ {
+ builder.Services.AddHostedService();
+ return builder;
+ }
+
+ public static IMachineBuilder AddMessageOutboxDeliveryService(this IMachineBuilder builder)
+ {
+ builder.Services.AddHostedService();
+ return builder;
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs
new file mode 100644
index 00000000..7463e6ac
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/IServiceCollectionExtensions.cs
@@ -0,0 +1,54 @@
+namespace Microsoft.Extensions.DependencyInjection;
+
+public static class IServiceCollectionExtensions
+{
+ public static IMachineBuilder AddMachine(this IServiceCollection services, IConfiguration? configuration = null)
+ {
+ if (!Sldr.IsInitialized)
+ Sldr.Initialize();
+
+ services.AddSingleton();
+ services.AddSingleton();
+ services.AddHealthChecks().AddCheck("S3 Bucket");
+
+ services.AddSingleton();
+ services.AddTransient();
+
+ services.AddScoped();
+ services.AddSingleton();
+ services.AddStartupTask(
+ (sp, cancellationToken) =>
+ sp.GetRequiredService().InitAsync(cancellationToken)
+ );
+
+ var builder = new MachineBuilder(services, configuration);
+ if (configuration is null)
+ {
+ builder.AddServiceOptions(o => { });
+ builder.AddSharedFileOptions(o => { });
+ builder.AddSmtTransferEngineOptions(o => { });
+ builder.AddClearMLOptions(o => { });
+ builder.AddBuildJobOptions(o => { });
+ builder.AddMessageOutboxOptions(o => { });
+ }
+ else
+ {
+ builder.AddServiceOptions(configuration.GetSection(ServiceOptions.Key));
+ builder.AddSharedFileOptions(configuration.GetSection(SharedFileOptions.Key));
+ builder.AddSmtTransferEngineOptions(configuration.GetSection(SmtTransferEngineOptions.Key));
+ builder.AddClearMLOptions(configuration.GetSection(ClearMLOptions.Key));
+ builder.AddBuildJobOptions(configuration.GetSection(BuildJobOptions.Key));
+ builder.AddMessageOutboxOptions(configuration.GetSection(MessageOutboxOptions.Key));
+ }
+ return builder;
+ }
+
+ public static IServiceCollection AddStartupTask(
+ this IServiceCollection services,
+ Func startupTask
+ )
+ {
+ services.AddHostedService(sp => new StartupTask(sp, startupTask));
+ return services;
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/MachineBuilder.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/MachineBuilder.cs
new file mode 100644
index 00000000..58ddf5c1
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/MachineBuilder.cs
@@ -0,0 +1,7 @@
+namespace Microsoft.Extensions.DependencyInjection;
+
+internal class MachineBuilder(IServiceCollection services, IConfiguration? configuration) : IMachineBuilder
+{
+ public IServiceCollection Services { get; } = services;
+ public IConfiguration? Configuration { get; } = configuration;
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/MessageOutboxOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/MessageOutboxOptions.cs
new file mode 100644
index 00000000..e2e88feb
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/MessageOutboxOptions.cs
@@ -0,0 +1,9 @@
+namespace Serval.Machine.Shared.Configuration;
+
+public class MessageOutboxOptions
+{
+ public const string Key = "MessageOutbox";
+
+ public string OutboxDir { get; set; } = "outbox";
+ public TimeSpan MessageExpirationTimeout { get; set; } = TimeSpan.FromHours(48);
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/ServiceOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/ServiceOptions.cs
new file mode 100644
index 00000000..8011e7b7
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/ServiceOptions.cs
@@ -0,0 +1,8 @@
+namespace Serval.Machine.Shared.Configuration;
+
+public class ServiceOptions
+{
+ public const string Key = "Service";
+
+ public string ServiceId { get; set; } = "machine_api";
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/SharedFileOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/SharedFileOptions.cs
new file mode 100644
index 00000000..4ae27e1e
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/SharedFileOptions.cs
@@ -0,0 +1,11 @@
+namespace Serval.Machine.Shared.Configuration;
+
+public class SharedFileOptions
+{
+ public const string Key = "SharedFile";
+
+ public string Uri { get; set; } = "file:///var/lib/machine/";
+ public string S3AccessKeyId { get; set; } = "";
+ public string S3SecretAccessKey { get; set; } = "";
+ public string S3Region { get; set; } = "us-east-1";
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/SmtTransferEngineOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/SmtTransferEngineOptions.cs
new file mode 100644
index 00000000..15002604
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/SmtTransferEngineOptions.cs
@@ -0,0 +1,10 @@
+namespace Serval.Machine.Shared.Configuration;
+
+public class SmtTransferEngineOptions
+{
+ public const string Key = "SmtTransferEngine";
+
+ public string EnginesDir { get; set; } = "translation_engines";
+ public TimeSpan EngineCommitFrequency { get; set; } = TimeSpan.FromMinutes(5);
+ public TimeSpan InactiveEngineTimeout { get; set; } = TimeSpan.FromMinutes(10);
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Configuration/ThotSmtModelOptions.cs b/src/Machine/src/Serval.Machine.Shared/Configuration/ThotSmtModelOptions.cs
new file mode 100644
index 00000000..780eb7d2
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Configuration/ThotSmtModelOptions.cs
@@ -0,0 +1,14 @@
+namespace Serval.Machine.Shared.Configuration;
+
+public class ThotSmtModelOptions
+{
+ public const string Key = "ThotSmtModel";
+
+ public ThotSmtModelOptions()
+ {
+ string installDir = Path.GetDirectoryName(Assembly.GetEntryAssembly()!.Location)!;
+ NewModelFile = Path.Combine(installDir, "thot-new-model.zip");
+ }
+
+ public string NewModelFile { get; set; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/Build.cs b/src/Machine/src/Serval.Machine.Shared/Models/Build.cs
new file mode 100644
index 00000000..aca20540
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/Build.cs
@@ -0,0 +1,32 @@
+namespace Serval.Machine.Shared.Models;
+
+public enum BuildJobState
+{
+ None,
+ Pending,
+ Active,
+ Canceling
+}
+
+public enum BuildJobRunnerType
+{
+ Hangfire,
+ ClearML
+}
+
+public enum BuildStage
+{
+ Preprocess,
+ Train,
+ Postprocess
+}
+
+public record Build
+{
+ public required string BuildId { get; init; }
+ public required BuildJobState JobState { get; init; }
+ public required string JobId { get; init; }
+ public required BuildJobRunnerType BuildJobRunner { get; init; }
+ public required BuildStage Stage { get; init; }
+ public string? Options { get; set; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/ClearMLMetricsEvent.cs b/src/Machine/src/Serval.Machine.Shared/Models/ClearMLMetricsEvent.cs
new file mode 100644
index 00000000..5ae9fbfd
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/ClearMLMetricsEvent.cs
@@ -0,0 +1,12 @@
+namespace Serval.Machine.Shared.Models;
+
+public record ClearMLMetricsEvent
+{
+ public string? Metric { get; init; }
+ public string? Variant { get; init; }
+ public required double Value { get; init; }
+ public double? MinValue { get; init; }
+ public int? MinValueIteration { get; init; }
+ public double? MaxValue { get; init; }
+ public int? MaxValueIteration { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/ClearMLProject.cs b/src/Machine/src/Serval.Machine.Shared/Models/ClearMLProject.cs
new file mode 100644
index 00000000..c0cd0d7e
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/ClearMLProject.cs
@@ -0,0 +1,6 @@
+namespace Serval.Machine.Shared.Models;
+
+public record ClearMLProject
+{
+ public required string Id { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/ClearMLTask.cs b/src/Machine/src/Serval.Machine.Shared/Models/ClearMLTask.cs
new file mode 100644
index 00000000..5b13fdaa
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/ClearMLTask.cs
@@ -0,0 +1,35 @@
+namespace Serval.Machine.Shared.Models;
+
+public enum ClearMLTaskStatus
+{
+ Created,
+ Queued,
+ InProgress,
+ Stopped,
+ Published,
+ Publishing,
+ Closed,
+ Failed,
+ Completed,
+ Unknown
+}
+
+public record ClearMLTask
+{
+ public required string Id { get; init; }
+ public required string Name { get; init; }
+ public required ClearMLProject Project { get; init; }
+ public required ClearMLTaskStatus Status { get; init; }
+ public string? StatusReason { get; init; }
+ public string? StatusMessage { get; init; }
+ public required DateTime Created { get; init; }
+ public int? LastIteration { get; init; }
+ public int ActiveDuration { get; init; }
+ public required IReadOnlyDictionary<
+ string,
+ IReadOnlyDictionary
+ > LastMetrics { get; init; }
+
+ [JsonConverter(typeof(DictionaryStringStringConverter))]
+ public required IReadOnlyDictionary Runtime { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/Corpus.cs b/src/Machine/src/Serval.Machine.Shared/Models/Corpus.cs
new file mode 100644
index 00000000..9145e90d
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/Corpus.cs
@@ -0,0 +1,14 @@
+namespace Serval.Machine.Shared.Models;
+
+public record Corpus
+{
+ public required string Id { get; init; }
+ public required string SourceLanguage { get; init; }
+ public required string TargetLanguage { get; init; }
+ public IReadOnlyDictionary>? TrainOnChapters { get; init; }
+ public IReadOnlyDictionary>? PretranslateChapters { get; init; }
+ public required HashSet? TrainOnTextIds { get; init; }
+ public required HashSet? PretranslateTextIds { get; init; }
+ public required IReadOnlyList SourceFiles { get; init; }
+ public required IReadOnlyList TargetFiles { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/CorpusFile.cs b/src/Machine/src/Serval.Machine.Shared/Models/CorpusFile.cs
new file mode 100644
index 00000000..a84bf7f6
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/CorpusFile.cs
@@ -0,0 +1,14 @@
+namespace Serval.Machine.Shared.Models;
+
+public enum FileFormat
+{
+ Text = 0,
+ Paratext = 1
+}
+
+public record CorpusFile
+{
+ public required string Location { get; init; }
+ public required FileFormat Format { get; init; }
+ public required string TextId { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/Lock.cs b/src/Machine/src/Serval.Machine.Shared/Models/Lock.cs
new file mode 100644
index 00000000..39ceae87
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/Lock.cs
@@ -0,0 +1,8 @@
+namespace Serval.Machine.Shared.Models;
+
+public record Lock
+{
+ public required string Id { get; init; }
+ public DateTime? ExpiresAt { get; init; }
+ public required string HostId { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/ModelDownloadUrl.cs b/src/Machine/src/Serval.Machine.Shared/Models/ModelDownloadUrl.cs
new file mode 100644
index 00000000..798fe175
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/ModelDownloadUrl.cs
@@ -0,0 +1,8 @@
+namespace Serval.Machine.Shared.Models;
+
+public record ModelDownloadUrl
+{
+ public required string Url { get; init; }
+ public required int ModelRevision { get; init; }
+ public required DateTime ExpiresAt { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/Outbox.cs b/src/Machine/src/Serval.Machine.Shared/Models/Outbox.cs
new file mode 100644
index 00000000..ad9c0001
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/Outbox.cs
@@ -0,0 +1,10 @@
+namespace Serval.Machine.Shared.Models;
+
+public record Outbox : IEntity
+{
+ public string Id { get; set; } = "";
+
+ public int Revision { get; set; }
+
+ public int CurrentIndex { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/OutboxMessage.cs b/src/Machine/src/Serval.Machine.Shared/Models/OutboxMessage.cs
new file mode 100644
index 00000000..0e95e9a6
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/OutboxMessage.cs
@@ -0,0 +1,15 @@
+namespace Serval.Machine.Shared.Models;
+
+public record OutboxMessage : IEntity
+{
+ public string Id { get; set; } = "";
+ public int Revision { get; set; } = 1;
+ public required int Index { get; init; }
+ public required string OutboxRef { get; init; }
+ public required string Method { get; init; }
+ public required string GroupId { get; init; }
+ public string? Content { get; init; }
+ public required bool HasContentStream { get; init; }
+ public DateTimeOffset Created { get; init; } = DateTimeOffset.UtcNow;
+ public int Attempts { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/Pretranslation.cs b/src/Machine/src/Serval.Machine.Shared/Models/Pretranslation.cs
new file mode 100644
index 00000000..6e9807b5
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/Pretranslation.cs
@@ -0,0 +1,9 @@
+namespace Serval.Machine.Shared.Models;
+
+public record Pretranslation
+{
+ public required string CorpusId { get; init; }
+ public required string TextId { get; init; }
+ public required IReadOnlyList Refs { get; init; }
+ public required string Translation { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/RWLock.cs b/src/Machine/src/Serval.Machine.Shared/Models/RWLock.cs
new file mode 100644
index 00000000..2271aa9b
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/RWLock.cs
@@ -0,0 +1,25 @@
+namespace Serval.Machine.Shared.Models;
+
+public record RWLock : IEntity
+{
+ public string Id { get; set; } = "";
+ public int Revision { get; set; } = 1;
+ public Lock? WriterLock { get; init; }
+ public required IReadOnlyList ReaderLocks { get; init; }
+ public required IReadOnlyList WriterQueue { get; init; }
+
+ public bool IsAvailableForReading()
+ {
+ var now = DateTime.UtcNow;
+ return (WriterLock is null || WriterLock.ExpiresAt is not null && WriterLock.ExpiresAt <= now)
+ && WriterQueue.Count == 0;
+ }
+
+ public bool IsAvailableForWriting(string? lockId = null)
+ {
+ var now = DateTime.UtcNow;
+ return (WriterLock is null || WriterLock.ExpiresAt is not null && WriterLock.ExpiresAt <= now)
+ && !ReaderLocks.Any(l => l.ExpiresAt is null || l.ExpiresAt > now)
+ && (lockId is null || WriterQueue.Count > 0 && WriterQueue[0].Id == lockId);
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/TrainSegmentPair.cs b/src/Machine/src/Serval.Machine.Shared/Models/TrainSegmentPair.cs
new file mode 100644
index 00000000..30927345
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/TrainSegmentPair.cs
@@ -0,0 +1,11 @@
+namespace Serval.Machine.Shared.Models;
+
+public record TrainSegmentPair : IEntity
+{
+ public string Id { get; set; } = "";
+ public int Revision { get; set; } = 1;
+ public required string TranslationEngineRef { get; init; }
+ public required string Source { get; init; }
+ public required string Target { get; init; }
+ public required bool SentenceStart { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Models/TranslationEngine.cs b/src/Machine/src/Serval.Machine.Shared/Models/TranslationEngine.cs
new file mode 100644
index 00000000..80b1f648
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Models/TranslationEngine.cs
@@ -0,0 +1,14 @@
+namespace Serval.Machine.Shared.Models;
+
+public record TranslationEngine : IEntity
+{
+ public string Id { get; set; } = "";
+ public int Revision { get; set; } = 1;
+ public required string EngineId { get; init; }
+ public required TranslationEngineType Type { get; init; }
+ public required string SourceLanguage { get; init; }
+ public required string TargetLanguage { get; init; }
+ public required bool IsModelPersisted { get; init; }
+ public int BuildRevision { get; init; }
+ public Build? CurrentBuild { get; init; }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Properties/AssemblyInfo.cs b/src/Machine/src/Serval.Machine.Shared/Properties/AssemblyInfo.cs
new file mode 100644
index 00000000..54a4902d
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Properties/AssemblyInfo.cs
@@ -0,0 +1,2 @@
+[assembly: InternalsVisibleTo("Serval.Machine.Shared.Tests")]
+[assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")]
diff --git a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj
new file mode 100644
index 00000000..6b716479
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj
@@ -0,0 +1,60 @@
+
+
+
+ net8.0
+ An ASP.NET Core web API middleware for the Machine library.
+ enable
+ enable
+ true
+ true
+ true
+ $(NoWarn);CS1591;CS1573
+
+
+
+
+
+
+
+
+ PreserveNewest
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs b/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs
new file mode 100644
index 00000000..244aa04a
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/BuildJobService.cs
@@ -0,0 +1,211 @@
+namespace Serval.Machine.Shared.Services;
+
+public class BuildJobService(IEnumerable runners, IRepository engines)
+ : IBuildJobService
+{
+ private readonly Dictionary _runners = runners.ToDictionary(r => r.Type);
+ private readonly IRepository _engines = engines;
+
+ public Task IsEngineBuilding(string engineId, CancellationToken cancellationToken = default)
+ {
+ return _engines.ExistsAsync(e => e.EngineId == engineId && e.CurrentBuild != null, cancellationToken);
+ }
+
+ public Task> GetBuildingEnginesAsync(
+ BuildJobRunnerType runner,
+ CancellationToken cancellationToken = default
+ )
+ {
+ return _engines.GetAllAsync(
+ e => e.CurrentBuild != null && e.CurrentBuild.BuildJobRunner == runner,
+ cancellationToken
+ );
+ }
+
+ public async Task GetBuildAsync(
+ string engineId,
+ string buildId,
+ CancellationToken cancellationToken = default
+ )
+ {
+ TranslationEngine? engine = await _engines.GetAsync(
+ e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.BuildId == buildId,
+ cancellationToken
+ );
+ return engine?.CurrentBuild;
+ }
+
+ public async Task CreateEngineAsync(
+ string engineId,
+ string? name = null,
+ CancellationToken cancellationToken = default
+ )
+ {
+ foreach (BuildJobRunnerType runnerType in _runners.Keys)
+ {
+ IBuildJobRunner runner = _runners[runnerType];
+ await runner.CreateEngineAsync(engineId, name, cancellationToken);
+ }
+ }
+
+ public async Task DeleteEngineAsync(string engineId, CancellationToken cancellationToken = default)
+ {
+ foreach (BuildJobRunnerType runnerType in _runners.Keys)
+ {
+ IBuildJobRunner runner = _runners[runnerType];
+ await runner.DeleteEngineAsync(engineId, cancellationToken);
+ }
+ }
+
+ public async Task StartBuildJobAsync(
+ BuildJobRunnerType runnerType,
+ string engineId,
+ string buildId,
+ BuildStage stage,
+ object? data = null,
+ string? buildOptions = null,
+ CancellationToken cancellationToken = default
+ )
+ {
+ TranslationEngine? engine = await _engines.GetAsync(
+ e =>
+ e.EngineId == engineId
+ && (e.CurrentBuild == null || e.CurrentBuild.JobState != BuildJobState.Canceling),
+ cancellationToken
+ );
+ if (engine is null)
+ return false;
+
+ IBuildJobRunner runner = _runners[runnerType];
+ string jobId = await runner.CreateJobAsync(
+ engine.Type,
+ engineId,
+ buildId,
+ stage,
+ data,
+ buildOptions,
+ cancellationToken
+ );
+ try
+ {
+ await _engines.UpdateAsync(
+ e => e.EngineId == engineId,
+ u =>
+ u.Set(
+ e => e.CurrentBuild,
+ new Build
+ {
+ BuildId = buildId,
+ JobId = jobId,
+ BuildJobRunner = runner.Type,
+ Stage = stage,
+ JobState = BuildJobState.Pending,
+ Options = buildOptions
+ }
+ ),
+ cancellationToken: cancellationToken
+ );
+ await runner.EnqueueJobAsync(jobId, engine.Type, cancellationToken);
+ return true;
+ }
+ catch
+ {
+ await runner.DeleteJobAsync(jobId, CancellationToken.None);
+ throw;
+ }
+ }
+
+ public async Task<(string? BuildId, BuildJobState State)> CancelBuildJobAsync(
+ string engineId,
+ CancellationToken cancellationToken = default
+ )
+ {
+ TranslationEngine? engine = await _engines.GetAsync(
+ e => e.EngineId == engineId && e.CurrentBuild != null,
+ cancellationToken
+ );
+ if (engine is null || engine.CurrentBuild is null)
+ return (null, BuildJobState.None);
+
+ IBuildJobRunner runner = _runners[engine.CurrentBuild.BuildJobRunner];
+
+ if (engine.CurrentBuild.JobState is BuildJobState.Pending)
+ {
+ // cancel a job that hasn't started yet
+ engine = await _engines.UpdateAsync(
+ e => e.EngineId == engineId && e.CurrentBuild != null,
+ u => u.Unset(b => b.CurrentBuild),
+ returnOriginal: true,
+ cancellationToken: cancellationToken
+ );
+ if (engine is not null && engine.CurrentBuild is not null)
+ {
+ // job will be deleted from the queue
+ await runner.StopJobAsync(engine.CurrentBuild.JobId, CancellationToken.None);
+ return (engine.CurrentBuild.BuildId, BuildJobState.None);
+ }
+ }
+ else if (engine.CurrentBuild.JobState is BuildJobState.Active)
+ {
+ // cancel a job that is already running
+ engine = await _engines.UpdateAsync(
+ e => e.EngineId == engineId && e.CurrentBuild != null,
+ u => u.Set(e => e.CurrentBuild!.JobState, BuildJobState.Canceling),
+ cancellationToken: cancellationToken
+ );
+ if (engine is not null && engine.CurrentBuild is not null)
+ {
+ await runner.StopJobAsync(engine.CurrentBuild.JobId, CancellationToken.None);
+ return (engine.CurrentBuild.BuildId, BuildJobState.Canceling);
+ }
+ }
+
+ return (null, BuildJobState.None);
+ }
+
+ public async Task BuildJobStartedAsync(
+ string engineId,
+ string buildId,
+ CancellationToken cancellationToken = default
+ )
+ {
+ TranslationEngine? engine = await _engines.UpdateAsync(
+ e =>
+ e.EngineId == engineId
+ && e.CurrentBuild != null
+ && e.CurrentBuild.BuildId == buildId
+ && e.CurrentBuild.JobState == BuildJobState.Pending,
+ u => u.Set(e => e.CurrentBuild!.JobState, BuildJobState.Active),
+ cancellationToken: cancellationToken
+ );
+ return engine is not null;
+ }
+
+ public Task BuildJobFinishedAsync(
+ string engineId,
+ string buildId,
+ bool buildComplete,
+ CancellationToken cancellationToken = default
+ )
+ {
+ return _engines.UpdateAsync(
+ e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.BuildId == buildId,
+ u =>
+ {
+ u.Unset(e => e.CurrentBuild);
+ if (buildComplete)
+ u.Inc(e => e.BuildRevision);
+ },
+ cancellationToken: cancellationToken
+ );
+ }
+
+ public Task BuildJobRestartingAsync(string engineId, string buildId, CancellationToken cancellationToken = default)
+ {
+ return _engines.UpdateAsync(
+ e => e.EngineId == engineId && e.CurrentBuild != null && e.CurrentBuild.BuildId == buildId,
+ u => u.Set(e => e.CurrentBuild!.JobState, BuildJobState.Pending),
+ cancellationToken: cancellationToken
+ );
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/BuildProgress.cs b/src/Machine/src/Serval.Machine.Shared/Services/BuildProgress.cs
new file mode 100644
index 00000000..88422c6c
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/BuildProgress.cs
@@ -0,0 +1,25 @@
+namespace Serval.Machine.Shared.Services;
+
+public class BuildProgress(IPlatformService platformService, string buildId) : IProgress
+{
+ private readonly IPlatformService _platformService = platformService;
+ private readonly string _buildId = buildId;
+ private ProgressStatus _prevStatus;
+
+ private DateTime _lastReportTime = DateTime.Now;
+
+ private const float ThrottleTimeSeconds = 1;
+
+ public void Report(ProgressStatus value)
+ {
+ if (_prevStatus.Equals(value))
+ return;
+
+ if (DateTime.Now < _lastReportTime.AddSeconds(ThrottleTimeSeconds))
+ return;
+
+ _lastReportTime = DateTime.Now;
+ _platformService.UpdateBuildStatusAsync(_buildId, value);
+ _prevStatus = value;
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/CancellationInterceptor.cs b/src/Machine/src/Serval.Machine.Shared/Services/CancellationInterceptor.cs
new file mode 100644
index 00000000..73b06c2b
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/CancellationInterceptor.cs
@@ -0,0 +1,30 @@
+namespace Serval.Machine.Shared.Services;
+
+public class CancellationInterceptor(ILogger logger) : Interceptor
+{
+ private readonly ILogger _logger = logger;
+
+ public override async Task UnaryServerHandler(
+ TRequest request,
+ ServerCallContext context,
+ UnaryServerMethod continuation
+ )
+ {
+ try
+ {
+ return await continuation(request, context);
+ }
+ catch (Exception ex)
+ {
+ if (ex is OperationCanceledException)
+ {
+ _logger.LogInformation("An operation was canceled.");
+ throw new RpcException(new Status(StatusCode.Cancelled, "An operation was canceled."));
+ }
+ else
+ {
+ throw;
+ }
+ }
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLAuthenticationService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLAuthenticationService.cs
new file mode 100644
index 00000000..9603aeb6
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLAuthenticationService.cs
@@ -0,0 +1,77 @@
+namespace Serval.Machine.Shared.Services;
+
+public class ClearMLAuthenticationService(
+ IServiceProvider services,
+ IHttpClientFactory httpClientFactory,
+ IOptionsMonitor options,
+ ILogger logger
+) : RecurrentTask("ClearML authentication service", services, RefreshPeriod, logger), IClearMLAuthenticationService
+{
+ private readonly HttpClient _httpClient = httpClientFactory.CreateClient("ClearML");
+ private readonly IOptionsMonitor _options = options;
+ private readonly ILogger _logger = logger;
+ private readonly AsyncLock _lock = new();
+
+ // technically, the token should be good for 30 days, but let's refresh each hour
+ // to know well ahead of time if something is wrong.
+ private static readonly TimeSpan RefreshPeriod = TimeSpan.FromSeconds(3600);
+ private string _authToken = "";
+
+ public async Task GetAuthTokenAsync(CancellationToken cancellationToken = default)
+ {
+ using (await _lock.LockAsync(cancellationToken))
+ {
+ if (_authToken is "")
+ {
+ //Should only happen once, so no different in cost than previous solution
+ _logger.LogInformation("Token was empty; refreshing");
+ await AuthorizeAsync(cancellationToken);
+ }
+ }
+ return _authToken;
+ }
+
+ protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken cancellationToken)
+ {
+ try
+ {
+ using (await _lock.LockAsync(cancellationToken))
+ await AuthorizeAsync(cancellationToken);
+ }
+ catch (Exception e)
+ {
+ if (_authToken is "")
+ {
+ _logger.LogError(e, "Error occurred while acquiring ClearML authentication token for the first time.");
+ // The ClearML token never was set. We can't continue without it.
+ throw;
+ }
+ else
+ {
+ _logger.LogError(e, "Error occurred while refreshing ClearML authentication token.");
+ }
+ }
+ }
+
+ private async Task AuthorizeAsync(CancellationToken cancellationToken)
+ {
+ var request = new HttpRequestMessage(HttpMethod.Post, "auth.login")
+ {
+ Content = new StringContent("{}", Encoding.UTF8, "application/json")
+ };
+ var authenticationString = $"{_options.CurrentValue.AccessKey}:{_options.CurrentValue.SecretKey}";
+ var base64EncodedAuthenticationString = Convert.ToBase64String(Encoding.ASCII.GetBytes(authenticationString));
+ request.Headers.Add("Authorization", $"Basic {base64EncodedAuthenticationString}");
+ HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken);
+ string result = await response.Content.ReadAsStringAsync(cancellationToken);
+ string? refreshedToken = (string?)((JsonObject?)JsonNode.Parse(result))?["data"]?["token"];
+ if (refreshedToken is null || refreshedToken is "")
+ {
+ throw new InvalidOperationException(
+ $"ClearML authentication failed - {response.StatusCode}: {response.ReasonPhrase}"
+ );
+ }
+ _authToken = refreshedToken;
+ _logger.LogInformation("ClearML Authentication Token Refresh Successful.");
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLBuildJobRunner.cs b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLBuildJobRunner.cs
new file mode 100644
index 00000000..910dd957
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLBuildJobRunner.cs
@@ -0,0 +1,88 @@
+namespace Serval.Machine.Shared.Services;
+
+public class ClearMLBuildJobRunner(
+ IClearMLService clearMLService,
+ IEnumerable buildJobFactories,
+ IOptionsMonitor options
+) : IBuildJobRunner
+{
+ private readonly IClearMLService _clearMLService = clearMLService;
+ private readonly Dictionary _buildJobFactories =
+ buildJobFactories.ToDictionary(f => f.EngineType);
+
+ private readonly Dictionary _options =
+ options.CurrentValue.ClearML.ToDictionary(o => o.TranslationEngineType);
+
+ public BuildJobRunnerType Type => BuildJobRunnerType.ClearML;
+
+ public async Task CreateEngineAsync(
+ string engineId,
+ string? name = null,
+ CancellationToken cancellationToken = default
+ )
+ {
+ await _clearMLService.CreateProjectAsync(engineId, name, cancellationToken);
+ }
+
+ public async Task DeleteEngineAsync(string engineId, CancellationToken cancellationToken = default)
+ {
+ string? projectId = await _clearMLService.GetProjectIdAsync(engineId, cancellationToken);
+ if (projectId is not null)
+ await _clearMLService.DeleteProjectAsync(projectId, cancellationToken);
+ }
+
+ public async Task CreateJobAsync(
+ TranslationEngineType engineType,
+ string engineId,
+ string buildId,
+ BuildStage stage,
+ object? data = null,
+ string? buildOptions = null,
+ CancellationToken cancellationToken = default
+ )
+ {
+ string? projectId = await _clearMLService.GetProjectIdAsync(engineId, cancellationToken);
+ projectId ??= await _clearMLService.CreateProjectAsync(engineId, cancellationToken: cancellationToken);
+
+ ClearMLTask? task = await _clearMLService.GetTaskByNameAsync(buildId, cancellationToken);
+ if (task is not null)
+ return task.Id;
+
+ IClearMLBuildJobFactory buildJobFactory = _buildJobFactories[engineType];
+ string script = await buildJobFactory.CreateJobScriptAsync(
+ engineId,
+ buildId,
+ _options[engineType].ModelType,
+ stage,
+ data,
+ buildOptions,
+ cancellationToken
+ );
+ return await _clearMLService.CreateTaskAsync(
+ buildId,
+ projectId,
+ script,
+ _options[engineType].DockerImage,
+ cancellationToken
+ );
+ }
+
+ public Task DeleteJobAsync(string jobId, CancellationToken cancellationToken = default)
+ {
+ return _clearMLService.DeleteTaskAsync(jobId, cancellationToken);
+ }
+
+ public Task EnqueueJobAsync(
+ string jobId,
+ TranslationEngineType engineType,
+ CancellationToken cancellationToken = default
+ )
+ {
+ return _clearMLService.EnqueueTaskAsync(jobId, _options[engineType].Queue, cancellationToken);
+ }
+
+ public Task StopJobAsync(string jobId, CancellationToken cancellationToken = default)
+ {
+ return _clearMLService.StopTaskAsync(jobId, cancellationToken);
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLHealthCheck.cs b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLHealthCheck.cs
new file mode 100644
index 00000000..929b14ed
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLHealthCheck.cs
@@ -0,0 +1,100 @@
+namespace Serval.Machine.Shared.Services;
+
+public class ClearMLHealthCheck(
+ IClearMLAuthenticationService clearMLAuthenticationService,
+ IHttpClientFactory httpClientFactory,
+ IOptionsMonitor buildJobOptions
+) : IHealthCheck
+{
+ private readonly HttpClient _httpClient = httpClientFactory.CreateClient("ClearML-NoRetry");
+ private readonly IClearMLAuthenticationService _clearMLAuthenticationService = clearMLAuthenticationService;
+ private readonly ISet _queuesMonitored = buildJobOptions
+ .CurrentValue.ClearML.Select(x => x.Queue)
+ .ToHashSet();
+
+ private int _numConsecutiveFailures = 0;
+ private readonly AsyncLock _lock = new AsyncLock();
+
+ public async Task CheckHealthAsync(
+ HealthCheckContext context,
+ CancellationToken cancellationToken = default
+ )
+ {
+ try
+ {
+ if (!await PingAsync(cancellationToken))
+ return HealthCheckResult.Unhealthy("ClearML is unresponsive");
+ IReadOnlySet queuesWithoutWorkers = await QueuesWithoutWorkers(cancellationToken);
+ if (queuesWithoutWorkers.Count > 0)
+ {
+ return HealthCheckResult.Unhealthy(
+ $"No ClearML agents are available for configured queues: {string.Join(", ", queuesWithoutWorkers)}"
+ );
+ }
+
+ using (await _lock.LockAsync(cancellationToken))
+ _numConsecutiveFailures = 0;
+ return HealthCheckResult.Healthy("ClearML is available");
+ }
+ catch (Exception e)
+ {
+ using (await _lock.LockAsync(cancellationToken))
+ {
+ _numConsecutiveFailures++;
+ return _numConsecutiveFailures > 3
+ ? HealthCheckResult.Unhealthy(exception: e)
+ : HealthCheckResult.Degraded(exception: e);
+ }
+ }
+ }
+
+ private async Task CallAsync(
+ string service,
+ string action,
+ JsonNode body,
+ CancellationToken cancellationToken = default
+ )
+ {
+ var request = new HttpRequestMessage(HttpMethod.Post, $"{service}.{action}")
+ {
+ Content = new StringContent(body.ToJsonString(), Encoding.UTF8, "application/json")
+ };
+ request.Headers.Add(
+ "Authorization",
+ $"Bearer {await _clearMLAuthenticationService.GetAuthTokenAsync(cancellationToken)}"
+ );
+ HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken);
+ string result = await response.Content.ReadAsStringAsync(cancellationToken);
+ return (JsonObject?)JsonNode.Parse(result);
+ }
+
+ public async Task PingAsync(CancellationToken cancellationToken = default)
+ {
+ JsonObject? result = await CallAsync("debug", "ping", new JsonObject(), cancellationToken);
+ return result is not null;
+ }
+
+ public async Task> QueuesWithoutWorkers(CancellationToken cancellationToken = default)
+ {
+ var queuesWithoutWorkers = _queuesMonitored.ToHashSet();
+ JsonObject? result = await CallAsync("workers", "get_all", new JsonObject(), cancellationToken);
+ JsonNode? workers_node = result?["data"]?["workers"];
+ if (workers_node is null)
+ throw new InvalidOperationException("Malformed response from ClearML server.");
+ var workers = (JsonArray)workers_node;
+ foreach (var worker in workers)
+ {
+ JsonNode? queues_node = worker?["queues"];
+ if (queues_node is null)
+ continue;
+ var queues = (JsonArray)queues_node;
+ foreach (var currentQueue in queues)
+ {
+ string? currentQueueName = (string?)currentQueue?["name"];
+ if (currentQueueName is not null)
+ queuesWithoutWorkers.Remove(currentQueueName);
+ }
+ }
+ return queuesWithoutWorkers;
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs
new file mode 100644
index 00000000..f577fdce
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLMonitorService.cs
@@ -0,0 +1,407 @@
+namespace Serval.Machine.Shared.Services;
+
+public class ClearMLMonitorService(
+ IServiceProvider services,
+ IClearMLService clearMLService,
+ ISharedFileService sharedFileService,
+ IOptionsMonitor clearMLOptions,
+ IOptionsMonitor buildJobOptions,
+ ILogger logger
+)
+ : RecurrentTask(
+ "ClearML monitor service",
+ services,
+ clearMLOptions.CurrentValue.BuildPollingTimeout,
+ logger,
+ clearMLOptions.CurrentValue.BuildPollingEnabled
+ ),
+ IClearMLQueueService
+{
+ private static readonly string SummaryMetric = CreateMD5("Summary");
+ private static readonly string TrainCorpusSizeVariant = CreateMD5("train_corpus_size");
+ private static readonly string ConfidenceVariant = CreateMD5("confidence");
+
+ private readonly IClearMLService _clearMLService = clearMLService;
+ private readonly ISharedFileService _sharedFileService = sharedFileService;
+ private readonly ILogger _logger = logger;
+ private readonly Dictionary _curBuildStatus = new();
+
+ private readonly IReadOnlyDictionary _queuePerEngineType =
+ buildJobOptions.CurrentValue.ClearML.ToDictionary(x => x.TranslationEngineType, x => x.Queue);
+
+ private readonly IDictionary _queueSizePerEngineType = new ConcurrentDictionary<
+ TranslationEngineType,
+ int
+ >(buildJobOptions.CurrentValue.ClearML.ToDictionary(x => x.TranslationEngineType, x => 0));
+
+ public int GetQueueSize(TranslationEngineType engineType)
+ {
+ return _queueSizePerEngineType[engineType];
+ }
+
+ protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken cancellationToken)
+ {
+ try
+ {
+ var buildJobService = scope.ServiceProvider.GetRequiredService();
+ IReadOnlyList trainingEngines = await buildJobService.GetBuildingEnginesAsync(
+ BuildJobRunnerType.ClearML,
+ cancellationToken
+ );
+ if (trainingEngines.Count == 0)
+ return;
+
+ Dictionary tasks = new();
+ Dictionary queuePositions = new();
+
+ foreach (TranslationEngineType engineType in _queuePerEngineType.Keys)
+ {
+ var tasksPerEngineType = (
+ await _clearMLService.GetTasksByIdAsync(
+ trainingEngines.Select(e => e.CurrentBuild!.JobId),
+ cancellationToken
+ )
+ )
+ .UnionBy(
+ await _clearMLService.GetTasksForQueueAsync(_queuePerEngineType[engineType], cancellationToken),
+ t => t.Id
+ )
+ .ToDictionary(t => t.Id);
+ // add new keys to dictionary
+ foreach (KeyValuePair kvp in tasksPerEngineType)
+ tasks.TryAdd(kvp.Key, kvp.Value);
+
+ var queuePositionsPerEngineType = tasksPerEngineType
+ .Values.Where(t => t.Status is ClearMLTaskStatus.Queued or ClearMLTaskStatus.Created)
+ .OrderBy(t => t.Created)
+ .Select((t, i) => (Position: i, Task: t))
+ .ToDictionary(e => e.Task.Name, e => e.Position);
+ // add new keys to dictionary
+ foreach (KeyValuePair kvp in queuePositionsPerEngineType)
+ queuePositions.TryAdd(kvp.Key, kvp.Value);
+
+ _queueSizePerEngineType[engineType] = queuePositionsPerEngineType.Count;
+ }
+
+ var dataAccessContext = scope.ServiceProvider.GetRequiredService();
+ var platformService = scope.ServiceProvider.GetRequiredService();
+ var lockFactory = scope.ServiceProvider.GetRequiredService();
+ foreach (TranslationEngine engine in trainingEngines)
+ {
+ if (engine.CurrentBuild is null || !tasks.TryGetValue(engine.CurrentBuild.JobId, out ClearMLTask? task))
+ continue;
+
+ if (
+ engine.CurrentBuild.JobState is BuildJobState.Pending
+ && task.Status is ClearMLTaskStatus.Queued or ClearMLTaskStatus.Created
+ )
+ {
+ await UpdateTrainJobStatus(
+ platformService,
+ engine.CurrentBuild.BuildId,
+ new ProgressStatus(step: 0, percentCompleted: 0.0),
+ //CurrentBuild.BuildId should always equal the corresponding task.Name
+ queuePositions[engine.CurrentBuild.BuildId] + 1,
+ cancellationToken
+ );
+ }
+
+ if (engine.CurrentBuild.Stage == BuildStage.Train)
+ {
+ if (
+ engine.CurrentBuild.JobState is BuildJobState.Pending
+ && task.Status
+ is ClearMLTaskStatus.InProgress
+ or ClearMLTaskStatus.Stopped
+ or ClearMLTaskStatus.Failed
+ or ClearMLTaskStatus.Completed
+ )
+ {
+ bool canceled = !await TrainJobStartedAsync(
+ dataAccessContext,
+ lockFactory,
+ buildJobService,
+ platformService,
+ engine.EngineId,
+ engine.CurrentBuild.BuildId,
+ cancellationToken
+ );
+ if (canceled)
+ continue;
+ }
+
+ switch (task.Status)
+ {
+ case ClearMLTaskStatus.InProgress:
+ {
+ double? percentCompleted = null;
+ if (task.Runtime.TryGetValue("progress", out string? progressStr))
+ percentCompleted = int.Parse(progressStr, CultureInfo.InvariantCulture) / 100.0;
+ task.Runtime.TryGetValue("message", out string? message);
+ await UpdateTrainJobStatus(
+ platformService,
+ engine.CurrentBuild.BuildId,
+ new ProgressStatus(task.LastIteration ?? 0, percentCompleted, message),
+ queueDepth: 0,
+ cancellationToken
+ );
+ break;
+ }
+
+ case ClearMLTaskStatus.Completed:
+ {
+ task.Runtime.TryGetValue("message", out string? message);
+ await UpdateTrainJobStatus(
+ platformService,
+ engine.CurrentBuild.BuildId,
+ new ProgressStatus(task.LastIteration ?? 0, percentCompleted: 1.0, message),
+ queueDepth: 0,
+ cancellationToken
+ );
+ bool canceling = !await TrainJobCompletedAsync(
+ lockFactory,
+ buildJobService,
+ engine.EngineId,
+ engine.CurrentBuild.BuildId,
+ (int)GetMetric(task, SummaryMetric, TrainCorpusSizeVariant),
+ GetMetric(task, SummaryMetric, ConfidenceVariant),
+ engine.CurrentBuild.Options,
+ cancellationToken
+ );
+ if (canceling)
+ {
+ await TrainJobCanceledAsync(
+ dataAccessContext,
+ lockFactory,
+ buildJobService,
+ platformService,
+ engine.EngineId,
+ engine.CurrentBuild.BuildId,
+ cancellationToken
+ );
+ }
+ break;
+ }
+
+ case ClearMLTaskStatus.Stopped:
+ {
+ await TrainJobCanceledAsync(
+ dataAccessContext,
+ lockFactory,
+ buildJobService,
+ platformService,
+ engine.EngineId,
+ engine.CurrentBuild.BuildId,
+ cancellationToken
+ );
+ break;
+ }
+
+ case ClearMLTaskStatus.Failed:
+ {
+ await TrainJobFaultedAsync(
+ dataAccessContext,
+ lockFactory,
+ buildJobService,
+ platformService,
+ engine.EngineId,
+ engine.CurrentBuild.BuildId,
+ $"{task.StatusReason} : {task.StatusMessage}",
+ cancellationToken
+ );
+ break;
+ }
+ }
+ }
+ }
+ }
+ catch (Exception e)
+ {
+ _logger.LogError(e, "Error occurred while monitoring ClearML tasks.");
+ }
+ }
+
+ private async Task TrainJobStartedAsync(
+ IDataAccessContext dataAccessContext,
+ IDistributedReaderWriterLockFactory lockFactory,
+ IBuildJobService buildJobService,
+ IPlatformService platformService,
+ string engineId,
+ string buildId,
+ CancellationToken cancellationToken = default
+ )
+ {
+ bool success;
+ IDistributedReaderWriterLock @lock = await lockFactory.CreateAsync(engineId, cancellationToken);
+ await using (await @lock.WriterLockAsync(cancellationToken: cancellationToken))
+ {
+ success = await dataAccessContext.WithTransactionAsync(
+ async (ct) =>
+ {
+ if (!await buildJobService.BuildJobStartedAsync(engineId, buildId, ct))
+ return false;
+ await platformService.BuildStartedAsync(buildId, CancellationToken.None);
+ return true;
+ },
+ cancellationToken: cancellationToken
+ );
+ }
+ await UpdateTrainJobStatus(platformService, buildId, new ProgressStatus(0), 0, cancellationToken);
+ _logger.LogInformation("Build started ({BuildId})", buildId);
+ return success;
+ }
+
+ private async Task TrainJobCompletedAsync(
+ IDistributedReaderWriterLockFactory lockFactory,
+ IBuildJobService buildJobService,
+ string engineId,
+ string buildId,
+ int corpusSize,
+ double confidence,
+ string? buildOptions,
+ CancellationToken cancellationToken
+ )
+ {
+ try
+ {
+ IDistributedReaderWriterLock @lock = await lockFactory.CreateAsync(engineId, cancellationToken);
+ await using (await @lock.WriterLockAsync(cancellationToken: cancellationToken))
+ {
+ return await buildJobService.StartBuildJobAsync(
+ BuildJobRunnerType.Hangfire,
+ engineId,
+ buildId,
+ BuildStage.Postprocess,
+ (corpusSize, confidence),
+ buildOptions,
+ cancellationToken
+ );
+ }
+ }
+ finally
+ {
+ _curBuildStatus.Remove(buildId);
+ }
+ }
+
+ private async Task TrainJobFaultedAsync(
+ IDataAccessContext dataAccessContext,
+ IDistributedReaderWriterLockFactory lockFactory,
+ IBuildJobService buildJobService,
+ IPlatformService platformService,
+ string engineId,
+ string buildId,
+ string message,
+ CancellationToken cancellationToken
+ )
+ {
+ try
+ {
+ IDistributedReaderWriterLock @lock = await lockFactory.CreateAsync(engineId, cancellationToken);
+ await using (await @lock.WriterLockAsync(cancellationToken: cancellationToken))
+ {
+ await dataAccessContext.WithTransactionAsync(
+ async (ct) =>
+ {
+ await platformService.BuildFaultedAsync(buildId, message, ct);
+ await buildJobService.BuildJobFinishedAsync(
+ engineId,
+ buildId,
+ buildComplete: false,
+ CancellationToken.None
+ );
+ },
+ cancellationToken: cancellationToken
+ );
+ }
+ _logger.LogError("Build faulted ({BuildId}). Error: {ErrorMessage}", buildId, message);
+ }
+ finally
+ {
+ _curBuildStatus.Remove(buildId);
+ }
+ }
+
+ private async Task TrainJobCanceledAsync(
+ IDataAccessContext dataAccessContext,
+ IDistributedReaderWriterLockFactory lockFactory,
+ IBuildJobService buildJobService,
+ IPlatformService platformService,
+ string engineId,
+ string buildId,
+ CancellationToken cancellationToken
+ )
+ {
+ try
+ {
+ IDistributedReaderWriterLock @lock = await lockFactory.CreateAsync(engineId, cancellationToken);
+ await using (await @lock.WriterLockAsync(cancellationToken: cancellationToken))
+ {
+ await dataAccessContext.WithTransactionAsync(
+ async (ct) =>
+ {
+ await platformService.BuildCanceledAsync(buildId, ct);
+ await buildJobService.BuildJobFinishedAsync(
+ engineId,
+ buildId,
+ buildComplete: false,
+ CancellationToken.None
+ );
+ },
+ cancellationToken: cancellationToken
+ );
+ }
+ _logger.LogInformation("Build canceled ({BuildId})", buildId);
+ }
+ finally
+ {
+ try
+ {
+ await _sharedFileService.DeleteAsync($"builds/{buildId}/", CancellationToken.None);
+ }
+ catch (Exception e)
+ {
+ _logger.LogWarning(e, "Unable to to delete job data for build {BuildId}.", buildId);
+ }
+ _curBuildStatus.Remove(buildId);
+ }
+ }
+
+ private async Task UpdateTrainJobStatus(
+ IPlatformService platformService,
+ string buildId,
+ ProgressStatus progressStatus,
+ int? queueDepth = null,
+ CancellationToken cancellationToken = default
+ )
+ {
+ if (
+ _curBuildStatus.TryGetValue(buildId, out ProgressStatus curProgressStatus)
+ && curProgressStatus.Equals(progressStatus)
+ )
+ {
+ return;
+ }
+ await platformService.UpdateBuildStatusAsync(buildId, progressStatus, queueDepth, cancellationToken);
+ _curBuildStatus[buildId] = progressStatus;
+ }
+
+ private static double GetMetric(ClearMLTask task, string metric, string variant)
+ {
+ if (!task.LastMetrics.TryGetValue(metric, out IReadOnlyDictionary? metricVariants))
+ return 0;
+
+ if (!metricVariants.TryGetValue(variant, out ClearMLMetricsEvent? metricEvent))
+ return 0;
+
+ return metricEvent.Value;
+ }
+
+ private static string CreateMD5(string input)
+ {
+ byte[] inputBytes = Encoding.UTF8.GetBytes(input);
+ byte[] hashBytes = MD5.HashData(inputBytes);
+
+ return Convert.ToHexString(hashBytes).ToLower();
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/ClearMLService.cs b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLService.cs
new file mode 100644
index 00000000..d3d6540c
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/ClearMLService.cs
@@ -0,0 +1,221 @@
+namespace Serval.Machine.Shared.Services;
+
+public class ClearMLService(
+ IHttpClientFactory httpClientFactory,
+ IOptionsMonitor options,
+ IClearMLAuthenticationService clearMLAuthService,
+ IHostEnvironment env
+) : IClearMLService
+{
+ private readonly HttpClient _httpClient = httpClientFactory.CreateClient("ClearML");
+ private readonly IOptionsMonitor _options = options;
+ private readonly IHostEnvironment _env = env;
+ private static readonly JsonNamingPolicy JsonNamingPolicy = new SnakeCaseJsonNamingPolicy();
+ private static readonly JsonSerializerOptions JsonSerializerOptions =
+ new()
+ {
+ PropertyNamingPolicy = JsonNamingPolicy,
+ Converters = { new CustomEnumConverterFactory(JsonNamingPolicy) }
+ };
+
+ private readonly IClearMLAuthenticationService _clearMLAuthService = clearMLAuthService;
+
+ public async Task GetProjectIdAsync(string name, CancellationToken cancellationToken = default)
+ {
+ var body = new JsonObject
+ {
+ ["name"] = $"{_options.CurrentValue.RootProject}/{_options.CurrentValue.Project}/{name}",
+ ["only_fields"] = new JsonArray("id")
+ };
+ JsonObject? result = await CallAsync("projects", "get_all", body, cancellationToken);
+ var projects = (JsonArray?)result?["data"]?["projects"];
+ if (projects is null)
+ throw new InvalidOperationException("Malformed response from ClearML server.");
+ if (projects.Count == 0)
+ return null;
+ return (string?)projects[0]?["id"];
+ }
+
+ public async Task CreateProjectAsync(
+ string name,
+ string? description = null,
+ CancellationToken cancellationToken = default
+ )
+ {
+ var body = new JsonObject
+ {
+ ["name"] = $"{_options.CurrentValue.RootProject}/{_options.CurrentValue.Project}/{name}"
+ };
+ if (description != null)
+ body["description"] = description;
+ JsonObject? result = await CallAsync("projects", "create", body, cancellationToken);
+ var projectId = (string?)result?["data"]?["id"];
+ if (projectId is null)
+ throw new InvalidOperationException("Malformed response from ClearML server.");
+ return projectId;
+ }
+
+ public async Task DeleteProjectAsync(string id, CancellationToken cancellationToken = default)
+ {
+ var body = new JsonObject
+ {
+ ["project"] = id,
+ ["delete_contents"] = true,
+ ["force"] = true // needed if there are tasks already in that project.
+ };
+ JsonObject? result = await CallAsync("projects", "delete", body, cancellationToken);
+ var deleted = (int?)result?["data"]?["deleted"];
+ if (deleted is null)
+ throw new InvalidOperationException("Malformed response from ClearML server.");
+ return deleted == 1;
+ }
+
+ public async Task CreateTaskAsync(
+ string buildId,
+ string projectId,
+ string script,
+ string dockerImage,
+ CancellationToken cancellationToken = default
+ )
+ {
+ var snakeCaseEnvironment = JsonNamingPolicy.ConvertName(_env.EnvironmentName);
+ var body = new JsonObject
+ {
+ ["name"] = buildId,
+ ["project"] = projectId,
+ ["script"] = new JsonObject { ["diff"] = script },
+ ["container"] = new JsonObject
+ {
+ ["image"] = dockerImage,
+ ["arguments"] = "--env ENV_FOR_DYNACONF=" + snakeCaseEnvironment,
+ },
+ ["type"] = "training"
+ };
+ JsonObject? result = await CallAsync("tasks", "create", body, cancellationToken);
+ var taskId = (string?)result?["data"]?["id"];
+ if (taskId is null)
+ throw new InvalidOperationException("Malformed response from ClearML server.");
+ return taskId;
+ }
+
+ public async Task DeleteTaskAsync(string id, CancellationToken cancellationToken = default)
+ {
+ var body = new JsonObject { ["task"] = id };
+ JsonObject? result = await CallAsync("tasks", "delete", body, cancellationToken);
+ var deleted = (bool?)result?["data"]?["deleted"];
+ if (deleted is null)
+ throw new InvalidOperationException("Malformed response from ClearML server.");
+ return deleted.Value;
+ }
+
+ public async Task EnqueueTaskAsync(string id, string queue, CancellationToken cancellationToken = default)
+ {
+ var body = new JsonObject { ["task"] = id, ["queue_name"] = queue };
+ JsonObject? result = await CallAsync("tasks", "enqueue", body, cancellationToken);
+ var queued = (int?)result?["data"]?["queued"];
+ if (queued is null)
+ throw new InvalidOperationException("Malformed response from ClearML server.");
+ return queued == 1;
+ }
+
+ public async Task DequeueTaskAsync(string id, CancellationToken cancellationToken = default)
+ {
+ var body = new JsonObject { ["task"] = id };
+ JsonObject? result = await CallAsync("tasks", "dequeue", body, cancellationToken);
+ var dequeued = (int?)result?["data"]?["dequeued"];
+ if (dequeued is null)
+ throw new InvalidOperationException("Malformed response from ClearML server.");
+ return dequeued == 1;
+ }
+
+ public async Task StopTaskAsync(string id, CancellationToken cancellationToken = default)
+ {
+ var body = new JsonObject { ["task"] = id, ["force"] = true };
+ JsonObject? result = await CallAsync("tasks", "stop", body, cancellationToken);
+ var updated = (int?)result?["data"]?["updated"];
+ if (updated is null)
+ throw new InvalidOperationException("Malformed response from ClearML server.");
+ return updated == 1;
+ }
+
+ public async Task> GetTasksForQueueAsync(
+ string queue,
+ CancellationToken cancellationToken = default
+ )
+ {
+ var body = new JsonObject { ["name"] = queue };
+ JsonObject? result = await CallAsync("queues", "get_all_ex", body, cancellationToken);
+ var tasks = (JsonArray?)result?["data"]?["queues"]?[0]?["entries"];
+ IEnumerable taskIds = tasks?.Select(t => (string)t?["id"]!) ?? new List();
+ return await GetTasksByIdAsync(taskIds, cancellationToken);
+ }
+
+ public async Task GetTaskByNameAsync(string name, CancellationToken cancellationToken = default)
+ {
+ IReadOnlyList tasks = await GetTasksAsync(new JsonObject { ["name"] = name }, cancellationToken);
+ if (tasks.Count == 0)
+ return null;
+ return tasks[0];
+ }
+
+ public Task> GetTasksByIdAsync(
+ IEnumerable ids,
+ CancellationToken cancellationToken = default
+ )
+ {
+ return GetTasksAsync(new JsonObject { ["id"] = JsonValue.Create(ids.ToArray()) }, cancellationToken);
+ }
+
+ private async Task> GetTasksAsync(
+ JsonObject body,
+ CancellationToken cancellationToken = default
+ )
+ {
+ body["only_fields"] = new JsonArray(
+ "id",
+ "name",
+ "status",
+ "project",
+ "last_iteration",
+ "status_reason",
+ "status_message",
+ "created",
+ "active_duration",
+ "last_metrics",
+ "runtime"
+ );
+ JsonObject? result = await CallAsync("tasks", "get_all_ex", body, cancellationToken);
+ var tasks = (JsonArray?)result?["data"]?["tasks"];
+ return tasks?.Select(t => t.Deserialize(JsonSerializerOptions)!).ToArray()
+ ?? Array.Empty();
+ }
+
+ private async Task CallAsync(
+ string service,
+ string action,
+ JsonNode body,
+ CancellationToken cancellationToken = default
+ )
+ {
+ var request = new HttpRequestMessage(HttpMethod.Post, $"{service}.{action}")
+ {
+ Content = new StringContent(body.ToJsonString(), Encoding.UTF8, "application/json")
+ };
+ request.Headers.Add(
+ "Authorization",
+ $"Bearer {await _clearMLAuthService.GetAuthTokenAsync(cancellationToken)}"
+ );
+ HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken);
+ string result = await response.Content.ReadAsStringAsync(cancellationToken);
+ return (JsonObject?)JsonNode.Parse(result);
+ }
+
+ private class SnakeCaseJsonNamingPolicy : JsonNamingPolicy
+ {
+ public override string ConvertName(string name)
+ {
+ return string.Concat(name.Select((x, i) => i > 0 && char.IsUpper(x) ? "_" + x.ToString() : x.ToString()))
+ .ToLowerInvariant();
+ }
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/CorpusService.cs b/src/Machine/src/Serval.Machine.Shared/Services/CorpusService.cs
new file mode 100644
index 00000000..17d562ad
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/CorpusService.cs
@@ -0,0 +1,51 @@
+namespace Serval.Machine.Shared.Services;
+
+public class CorpusService : ICorpusService
+{
+ public IEnumerable CreateTextCorpora(IReadOnlyList files)
+ {
+ List corpora = [];
+
+ List> textFileCorpora = [];
+ foreach (CorpusFile file in files)
+ {
+ switch (file.Format)
+ {
+ case FileFormat.Text:
+ // if there are multiple texts with the same id, then add it to a new corpus or the first
+ // corpus that doesn't contain a text with that id
+ Dictionary? corpus = textFileCorpora.FirstOrDefault(c =>
+ !c.ContainsKey(file.TextId)
+ );
+ if (corpus is null)
+ {
+ corpus = [];
+ textFileCorpora.Add(corpus);
+ }
+ corpus[file.TextId] = new TextFileText(file.TextId, file.Location);
+ break;
+
+ case FileFormat.Paratext:
+ corpora.Add(new ParatextBackupTextCorpus(file.Location, includeAllText: true));
+ break;
+ }
+ }
+ foreach (Dictionary corpus in textFileCorpora)
+ corpora.Add(new DictionaryTextCorpus(corpus.Values));
+
+ return corpora;
+ }
+
+ public IEnumerable CreateTermCorpora(IReadOnlyList files)
+ {
+ foreach (CorpusFile file in files)
+ {
+ switch (file.Format)
+ {
+ case FileFormat.Paratext:
+ yield return new ParatextBackupTermsCorpus(file.Location, ["PN"]);
+ break;
+ }
+ }
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/DistributedReaderWriterLock.cs b/src/Machine/src/Serval.Machine.Shared/Services/DistributedReaderWriterLock.cs
new file mode 100644
index 00000000..7ea8679f
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/DistributedReaderWriterLock.cs
@@ -0,0 +1,177 @@
+namespace Serval.Machine.Shared.Services;
+
+public class DistributedReaderWriterLock(string hostId, IRepository locks, IIdGenerator idGenerator, string id)
+ : IDistributedReaderWriterLock
+{
+ private readonly string _hostId = hostId;
+ private readonly IRepository _locks = locks;
+ private readonly IIdGenerator _idGenerator = idGenerator;
+ private readonly string _id = id;
+
+ public async Task ReaderLockAsync(
+ TimeSpan? lifetime = default,
+ CancellationToken cancellationToken = default
+ )
+ {
+ string lockId = _idGenerator.GenerateId();
+ if (!await TryAcquireReaderLock(lockId, lifetime, cancellationToken))
+ {
+ using ISubscription sub = await _locks.SubscribeAsync(rwl => rwl.Id == _id, cancellationToken);
+ do
+ {
+ RWLock? rwLock = sub.Change.Entity;
+ if (rwLock is not null && !rwLock.IsAvailableForReading())
+ {
+ TimeSpan? timeout = default;
+ if (rwLock.WriterLock?.ExpiresAt is not null)
+ {
+ timeout = rwLock.WriterLock.ExpiresAt - DateTime.UtcNow;
+ if (timeout < TimeSpan.Zero)
+ timeout = TimeSpan.Zero;
+ }
+ if (timeout != TimeSpan.Zero)
+ await sub.WaitForChangeAsync(timeout, cancellationToken);
+ }
+ } while (!await TryAcquireReaderLock(lockId, lifetime, cancellationToken));
+ }
+ return new ReaderLockReleaser(this, lockId);
+ }
+
+ public async Task WriterLockAsync(
+ TimeSpan? lifetime = default,
+ CancellationToken cancellationToken = default
+ )
+ {
+ string lockId = _idGenerator.GenerateId();
+ if (!await TryAcquireWriterLock(lockId, lifetime, cancellationToken))
+ {
+ await _locks.UpdateAsync(
+ _id,
+ u => u.Add(rwl => rwl.WriterQueue, new Lock { Id = lockId, HostId = _hostId }),
+ cancellationToken: cancellationToken
+ );
+ try
+ {
+ using ISubscription sub = await _locks.SubscribeAsync(rwl => rwl.Id == _id, cancellationToken);
+ do
+ {
+ RWLock? rwLock = sub.Change.Entity;
+ if (rwLock is not null && !rwLock.IsAvailableForWriting(lockId))
+ {
+ var dateTimes = rwLock
+ .ReaderLocks.Where(l => l.ExpiresAt.HasValue)
+ .Select(l => l.ExpiresAt.GetValueOrDefault())
+ .ToList();
+ if (rwLock.WriterLock?.ExpiresAt is not null)
+ dateTimes.Add(rwLock.WriterLock.ExpiresAt.Value);
+ TimeSpan? timeout = default;
+ if (dateTimes.Count > 0)
+ {
+ timeout = dateTimes.Max() - DateTime.UtcNow;
+ if (timeout < TimeSpan.Zero)
+ timeout = TimeSpan.Zero;
+ }
+ if (timeout != TimeSpan.Zero)
+ await sub.WaitForChangeAsync(timeout, cancellationToken);
+ }
+ } while (!await TryAcquireWriterLock(lockId, lifetime, cancellationToken));
+ }
+ catch
+ {
+ await _locks.UpdateAsync(
+ _id,
+ u => u.RemoveAll(rwl => rwl.WriterQueue, l => l.Id == lockId),
+ cancellationToken: cancellationToken
+ );
+ throw;
+ }
+ }
+ return new WriterLockReleaser(this, lockId);
+ }
+
+ private async Task TryAcquireWriterLock(
+ string lockId,
+ TimeSpan? lifetime,
+ CancellationToken cancellationToken
+ )
+ {
+ var now = DateTime.UtcNow;
+ Expression> filter = rwl =>
+ rwl.Id == _id
+ && (rwl.WriterLock == null || rwl.WriterLock.ExpiresAt != null && rwl.WriterLock.ExpiresAt <= now)
+ && !rwl.ReaderLocks.Any(l => l.ExpiresAt == null || l.ExpiresAt > now)
+ && (!rwl.WriterQueue.Any() || rwl.WriterQueue[0].Id == lockId);
+ void Update(IUpdateBuilder u)
+ {
+ u.Set(
+ rwl => rwl.WriterLock,
+ new Lock
+ {
+ Id = lockId,
+ ExpiresAt = lifetime is null ? null : now + lifetime,
+ HostId = _hostId
+ }
+ );
+ u.RemoveAll(rwl => rwl.WriterQueue, l => l.Id == lockId);
+ }
+ RWLock? rwLock = await _locks.UpdateAsync(filter, Update, cancellationToken: cancellationToken);
+ return rwLock is not null;
+ }
+
+ private async Task TryAcquireReaderLock(
+ string lockId,
+ TimeSpan? lifetime,
+ CancellationToken cancellationToken
+ )
+ {
+ var now = DateTime.UtcNow;
+ Expression> filter = rwl =>
+ rwl.Id == _id
+ && (rwl.WriterLock == null || rwl.WriterLock.ExpiresAt != null && rwl.WriterLock.ExpiresAt <= now)
+ && !rwl.WriterQueue.Any();
+ void Update(IUpdateBuilder u)
+ {
+ u.Add(
+ rwl => rwl.ReaderLocks,
+ new Lock
+ {
+ Id = lockId,
+ ExpiresAt = lifetime is null ? null : now + lifetime,
+ HostId = _hostId
+ }
+ );
+ }
+
+ RWLock? rwLock = await _locks.UpdateAsync(filter, Update, cancellationToken: cancellationToken);
+ return rwLock is not null;
+ }
+
+ private class WriterLockReleaser(DistributedReaderWriterLock distributedLock, string lockId) : AsyncDisposableBase
+ {
+ private readonly DistributedReaderWriterLock _distributedLock = distributedLock;
+ private readonly string _lockId = lockId;
+
+ protected override async ValueTask DisposeAsyncCore()
+ {
+ Expression> filter = rwl =>
+ rwl.Id == _distributedLock._id && rwl.WriterLock != null && rwl.WriterLock.Id == _lockId;
+ await _distributedLock._locks.UpdateAsync(filter, u => u.Unset(rwl => rwl.WriterLock));
+ }
+ }
+
+ private class ReaderLockReleaser(DistributedReaderWriterLock distributedLock, string lockId) : AsyncDisposableBase
+ {
+ private readonly DistributedReaderWriterLock _distributedLock = distributedLock;
+ private readonly string _lockId = lockId;
+
+ protected override async ValueTask DisposeAsyncCore()
+ {
+ Expression> filter = rwl =>
+ rwl.Id == _distributedLock._id && rwl.ReaderLocks.Any(l => l.Id == _lockId);
+ await _distributedLock._locks.UpdateAsync(
+ filter,
+ u => u.RemoveAll(rwl => rwl.ReaderLocks, l => l.Id == _lockId)
+ );
+ }
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/DistributedReaderWriterLockFactory.cs b/src/Machine/src/Serval.Machine.Shared/Services/DistributedReaderWriterLockFactory.cs
new file mode 100644
index 00000000..81810fb1
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/DistributedReaderWriterLockFactory.cs
@@ -0,0 +1,77 @@
+namespace Serval.Machine.Shared.Services;
+
+public class DistributedReaderWriterLockFactory(
+ IOptions serviceOptions,
+ IRepository locks,
+ IIdGenerator idGenerator
+) : IDistributedReaderWriterLockFactory
+{
+ private readonly ServiceOptions _serviceOptions = serviceOptions.Value;
+ private readonly IIdGenerator _idGenerator = idGenerator;
+ private readonly IRepository _locks = locks;
+
+ public async Task InitAsync(CancellationToken cancellationToken = default)
+ {
+ await RemoveAllWaitersAsync(cancellationToken);
+ await ReleaseAllWriterLocksAsync(cancellationToken);
+ await ReleaseAllReaderLocksAsync(cancellationToken);
+ }
+
+ public async Task CreateAsync(
+ string id,
+ CancellationToken cancellationToken = default
+ )
+ {
+ try
+ {
+ await _locks.InsertAsync(
+ new RWLock
+ {
+ Id = id,
+ ReaderLocks = [],
+ WriterQueue = []
+ },
+ cancellationToken
+ );
+ }
+ catch (DuplicateKeyException)
+ {
+ // the lock is already made - no new one needs to be made
+ // This is done instead of checking if it exists first to prevent race conditions.
+ }
+ return new DistributedReaderWriterLock(_serviceOptions.ServiceId, _locks, _idGenerator, id);
+ }
+
+ public async Task DeleteAsync(string id, CancellationToken cancellationToken = default)
+ {
+ RWLock? rwLock = await _locks.DeleteAsync(rwl => rwl.Id == id, cancellationToken);
+ return rwLock is not null;
+ }
+
+ private async Task ReleaseAllWriterLocksAsync(CancellationToken cancellationToken)
+ {
+ await _locks.UpdateAllAsync(
+ rwl => rwl.WriterLock != null && rwl.WriterLock.HostId == _serviceOptions.ServiceId,
+ u => u.Unset(rwl => rwl.WriterLock),
+ cancellationToken
+ );
+ }
+
+ private async Task ReleaseAllReaderLocksAsync(CancellationToken cancellationToken)
+ {
+ await _locks.UpdateAllAsync(
+ rwl => rwl.ReaderLocks.Any(l => l.HostId == _serviceOptions.ServiceId),
+ u => u.RemoveAll(rwl => rwl.ReaderLocks, l => l.HostId == _serviceOptions.ServiceId),
+ cancellationToken
+ );
+ }
+
+ private async Task RemoveAllWaitersAsync(CancellationToken cancellationToken)
+ {
+ await _locks.UpdateAllAsync(
+ rwl => rwl.WriterQueue.Any(l => l.HostId == _serviceOptions.ServiceId),
+ u => u.RemoveAll(rwl => rwl.WriterQueue, l => l.HostId == _serviceOptions.ServiceId),
+ cancellationToken
+ );
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/FileSystem.cs b/src/Machine/src/Serval.Machine.Shared/Services/FileSystem.cs
new file mode 100644
index 00000000..78a3ceb2
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/FileSystem.cs
@@ -0,0 +1,25 @@
+namespace Serval.Machine.Shared.Services;
+
+public class FileSystem : IFileSystem
+{
+ public void CreateDirectory(string path)
+ {
+ Directory.CreateDirectory(path);
+ }
+
+ public void DeleteFile(string path)
+ {
+ if (File.Exists(path))
+ File.Delete(path);
+ }
+
+ public Stream OpenWrite(string path)
+ {
+ return File.OpenWrite(path);
+ }
+
+ public Stream OpenRead(string path)
+ {
+ return File.OpenRead(path);
+ }
+}
diff --git a/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJob.cs b/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJob.cs
new file mode 100644
index 00000000..26fe58ed
--- /dev/null
+++ b/src/Machine/src/Serval.Machine.Shared/Services/HangfireBuildJob.cs
@@ -0,0 +1,180 @@
+namespace Serval.Machine.Shared.Services;
+
+public abstract class HangfireBuildJob(
+ IPlatformService platformService,
+ IRepository engines,
+ IDistributedReaderWriterLockFactory lockFactory,
+ IDataAccessContext dataAccessContext,
+ IBuildJobService buildJobService,
+ ILogger logger
+) : HangfireBuildJob