Skip to content

Commit

Permalink
Move to service; address scripture alignment issue
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Oct 18, 2024
1 parent 6ceee18 commit 2768b4c
Show file tree
Hide file tree
Showing 21 changed files with 209 additions and 140 deletions.
22 changes: 11 additions & 11 deletions Serval.sln
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,13 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{C3A14577-A65
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SIL.ServiceToolkit", "src\ServiceToolkit\src\SIL.ServiceToolkit\SIL.ServiceToolkit.csproj", "{0E40F959-C641-40A2-9750-B17A4F9F9E55}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{E41916A7-B9AA-45BE-BCFF-656722FEEA84}"
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{32B63C4B-AECD-4499-ADFB-69EF581B4F4C}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ServiceToolkit", "ServiceToolkit", "{A4DA43D4-29BC-4164-A114-E1775B2C9573}"
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ServiceToolkit", "ServiceToolkit", "{76123A14-29A5-480D-942E-FE00D6474D50}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{5C42D20E-8DFC-4221-BA97-62D9E5742349}"
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{1DB5E6D1-17A8-4FF2-B90A-C5DFBEF63126}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SIL.ServiceToolkit.Tests", "src\ServiceToolkit\test\SIL.ServiceToolkit\SIL.ServiceToolkit.Tests.csproj", "{3DC5CD22-3E98-434A-9B00-EBC4DDF797A1}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SIL.ServiceToolkit.Tests", "src\ServiceToolkit\test\SIL.ServiceToolkit.Tests\SIL.ServiceToolkit.Tests.csproj", "{C50ED15A-876D-42BF-980A-388E8C49C78D}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -188,10 +188,10 @@ Global
{0E40F959-C641-40A2-9750-B17A4F9F9E55}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0E40F959-C641-40A2-9750-B17A4F9F9E55}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0E40F959-C641-40A2-9750-B17A4F9F9E55}.Release|Any CPU.Build.0 = Release|Any CPU
{3DC5CD22-3E98-434A-9B00-EBC4DDF797A1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{3DC5CD22-3E98-434A-9B00-EBC4DDF797A1}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3DC5CD22-3E98-434A-9B00-EBC4DDF797A1}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3DC5CD22-3E98-434A-9B00-EBC4DDF797A1}.Release|Any CPU.Build.0 = Release|Any CPU
{C50ED15A-876D-42BF-980A-388E8C49C78D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C50ED15A-876D-42BF-980A-388E8C49C78D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C50ED15A-876D-42BF-980A-388E8C49C78D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C50ED15A-876D-42BF-980A-388E8C49C78D}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -227,9 +227,9 @@ Global
{10657805-48F1-4205-B8F5-79447F6EF620} = {25CDB05B-4E24-4A6E-933E-1E0BEC97D74D}
{C3A14577-A654-4604-818C-4E683DD45A51} = {EA69B41C-49EF-4017-A687-44B9DF37FF98}
{0E40F959-C641-40A2-9750-B17A4F9F9E55} = {C3A14577-A654-4604-818C-4E683DD45A51}
{A4DA43D4-29BC-4164-A114-E1775B2C9573} = {E41916A7-B9AA-45BE-BCFF-656722FEEA84}
{5C42D20E-8DFC-4221-BA97-62D9E5742349} = {A4DA43D4-29BC-4164-A114-E1775B2C9573}
{3DC5CD22-3E98-434A-9B00-EBC4DDF797A1} = {5C42D20E-8DFC-4221-BA97-62D9E5742349}
{76123A14-29A5-480D-942E-FE00D6474D50} = {32B63C4B-AECD-4499-ADFB-69EF581B4F4C}
{1DB5E6D1-17A8-4FF2-B90A-C5DFBEF63126} = {76123A14-29A5-480D-942E-FE00D6474D50}
{C50ED15A-876D-42BF-980A-388E8C49C78D} = {1DB5E6D1-17A8-4FF2-B90A-C5DFBEF63126}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {9F18C25E-E140-43C3-B177-D562E1628370}
Expand Down
10 changes: 8 additions & 2 deletions src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
namespace EchoTranslationEngine;

public class TranslationEngineServiceV1(BackgroundTaskQueue taskQueue) : TranslationEngineApi.TranslationEngineApiBase
public class TranslationEngineServiceV1(
BackgroundTaskQueue taskQueue,
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
) : TranslationEngineApi.TranslationEngineApiBase
{
private static readonly Empty Empty = new();
private readonly BackgroundTaskQueue _taskQueue = taskQueue;

private readonly IParallelCorpusPreprocessingService _parallelCorpusPreprocessingService =
parallelCorpusPreprocessingService;

public override Task<CreateResponse> Create(CreateRequest request, ServerCallContext context)
{
if (request.SourceLanguage != request.TargetLanguage)
Expand Down Expand Up @@ -76,7 +82,7 @@ await client.BuildStartedAsync(
try
{
List<InsertPretranslationsRequest> pretranslationsRequests = [];
ParallelCorpusPreprocessor.PreprocessCorpora(
_parallelCorpusPreprocessingService.Preprocess(
request.Corpora.Select(Map).ToList(),
row => { },
(row, corpus) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ public static IMachineBuilder AddBuildJobOptions(this IMachineBuilder builder, I
return builder;
}

public static IMachineBuilder AddServiceToolkitServices(this IMachineBuilder builder)
{
builder.Services.AddParallelCorpusPreprocessor();
return builder;
}

public static IMachineBuilder AddThotSmtModel(this IMachineBuilder builder)
{
if (builder.Configuration is null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public static IMachineBuilder AddMachine(this IServiceCollection services, IConf
configuration.GetSection(DistributedReaderWriterLockOptions.Key)
);
builder.AddBuildJobOptions(configuration.GetSection(BuildJobOptions.Key));
builder.AddServiceToolkitServices();
builder.AddMessageOutboxOptions(configuration.GetSection(MessageOutboxOptions.Key));
}
return builder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,18 @@ public class NmtPreprocessBuildJob(
ILogger<NmtPreprocessBuildJob> logger,
IBuildJobService buildJobService,
ISharedFileService sharedFileService,
ILanguageTagService languageTagService
) : PreprocessBuildJob(platformService, engines, dataAccessContext, logger, buildJobService, sharedFileService)
ILanguageTagService languageTagService,
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
)
: PreprocessBuildJob(
platformService,
engines,
dataAccessContext,
logger,
buildJobService,
sharedFileService,
parallelCorpusPreprocessingService
)
{
private readonly ILanguageTagService _languageTagService = languageTagService;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ public class PreprocessBuildJob(
IDataAccessContext dataAccessContext,
ILogger<PreprocessBuildJob> logger,
IBuildJobService buildJobService,
ISharedFileService sharedFileService
ISharedFileService sharedFileService,
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
)
: HangfireBuildJob<IReadOnlyList<ParallelCorpus>>(
platformService,
Expand All @@ -22,7 +23,8 @@ ISharedFileService sharedFileService

private readonly ISharedFileService _sharedFileService = sharedFileService;

public ICorpusService CorpusService { get; set; } = new CorpusService();
private readonly IParallelCorpusPreprocessingService _parallelCorpusPreprocessingService =
parallelCorpusPreprocessingService;

protected override async Task DoWorkAsync(
string engineId,
Expand Down Expand Up @@ -105,7 +107,7 @@ CancellationToken cancellationToken
int trainCount = 0;
int pretranslateCount = 0;
pretranslateWriter.WriteStartArray();
new ParallelCorpusPreprocessor() { CorpusService = CorpusService }.Preprocess(
_parallelCorpusPreprocessingService.Preprocess(
corpora,
row =>
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,18 @@ public class SmtTransferPreprocessBuildJob(
IBuildJobService buildJobService,
ISharedFileService sharedFileService,
IDistributedReaderWriterLockFactory lockFactory,
IRepository<TrainSegmentPair> trainSegmentPairs
) : PreprocessBuildJob(platformService, engines, dataAccessContext, logger, buildJobService, sharedFileService)
IRepository<TrainSegmentPair> trainSegmentPairs,
IParallelCorpusPreprocessingService parallelCorpusPreprocessingService
)
: PreprocessBuildJob(
platformService,
engines,
dataAccessContext,
logger,
buildJobService,
sharedFileService,
parallelCorpusPreprocessingService
)
{
private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory;
private readonly IRepository<TrainSegmentPair> _trainSegmentPairs = trainSegmentPairs;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
namespace Serval.Machine.Shared.Services;
using SIL.ServiceToolkit.Utils;

namespace Serval.Machine.Shared.Services;

[TestFixture]
public class NmtEngineServiceTests
Expand Down Expand Up @@ -301,7 +303,8 @@ public override object ActivateJob(Type jobType)
Substitute.For<ILogger<NmtPreprocessBuildJob>>(),
_env.BuildJobService,
_env.SharedFileService,
new LanguageTagService()
new LanguageTagService(),
new ParallelCorpusPreprocessingService(new CorpusService())
);
}
if (jobType == typeof(PostprocessBuildJob))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using SIL.ServiceToolkit.Utils;

namespace Serval.Machine.Shared.Services;

[TestFixture]
Expand Down Expand Up @@ -160,7 +162,11 @@ public async Task RunAsync_PretranslateChapters()

await env.RunBuildJobAsync(corpus1);

Assert.That(await env.GetPretranslateCountAsync(), Is.EqualTo(4));
Assert.That(
await env.GetPretranslateCountAsync(),
Is.EqualTo(4),
JsonSerializer.Serialize(await env.GetPretranslationsAsync())
);
}

[Test]
Expand Down Expand Up @@ -208,7 +214,7 @@ public async Task RunAsync_MixedSource_Paratext()
});
Assert.That(
await env.GetPretranslateCountAsync(),
Is.EqualTo(11),
Is.EqualTo(14),
JsonSerializer.Serialize(await env.GetPretranslationsAsync())
);
}
Expand Down Expand Up @@ -409,6 +415,13 @@ public async Task ParallelCorpusLogic()
new() { }
}
},
PretranslateChapters = new()
{
{
"1CH",
new() { }
}
}
},
},
TargetCorpora = new List<MonolingualCorpus>()
Expand Down Expand Up @@ -455,10 +468,12 @@ public async Task ParallelCorpusLogic()
}
};
await env.RunBuildJobAsync(corpora, useKeyTerms: false);
JsonArray? pretranslations = await env.GetPretranslationsAsync();
Assert.Multiple(async () =>
{
string src = await env.GetSourceExtractAsync();
Assert.That(
await env.GetSourceExtractAsync(),
src,
Is.EqualTo(
@"Source one, chapter fourteen, verse fifty-five. Segment b.
Source one, chapter fourteen, verse fifty-six.
Expand All @@ -471,32 +486,35 @@ await env.GetSourceExtractAsync(),
Source two, chapter one, verse nine. Source two, chapter one, verse ten.
Source two, chapter one, verse one.
"
)
),
src
);
string trg = await env.GetTargetExtractAsync();
Assert.That(
await env.GetTargetExtractAsync(),
trg,
Is.EqualTo(
@"Target two, chapter fourteen, verse fifty-five.
Target two, chapter fourteen, verse fifty-six.
Target one, chapter one, verse one.
Target one, chapter one, verse two.
Target two, chapter one, verse one.
Target two, chapter one, verse two.
Target one, chapter one, verse three.
Target one, chapter one, verse five and six.
Target two, chapter one, verse five and six.
Target one, chapter one, verse seven and eight.
Target one, chapter one, verse nine and ten.
Target two, chapter one, verse nine and ten.
"
)
),
trg
);
Assert.That(pretranslations, Is.Not.Null);
Assert.That(pretranslations!.Count, Is.EqualTo(9), pretranslations.ToJsonString());
Assert.That(
pretranslations[0]!["translation"]!.ToString(),
Is.EqualTo("Source one, chapter twelve, verse one."),
pretranslations.ToJsonString()
);
});
JsonArray? pretranslations = await env.GetPretranslationsAsync();
Assert.That(pretranslations, Is.Not.Null);
Assert.That(pretranslations!.Count, Is.EqualTo(3), pretranslations.ToJsonString());
Assert.That(
pretranslations[2]!["translation"]!.ToString(),
Is.EqualTo("Source one, chapter thirteen, verse one.")
);
}

private class TestEnvironment : DisposableBase
Expand Down Expand Up @@ -802,11 +820,9 @@ public PreprocessBuildJob GetBuildJob(TranslationEngineType engineType)
Substitute.For<ILogger<NmtPreprocessBuildJob>>(),
BuildJobService,
SharedFileService,
new LanguageTagService()
)
{
CorpusService = CorpusService
};
new LanguageTagService(),
new ParallelCorpusPreprocessingService(CorpusService)
);
}
case TranslationEngineType.SmtTransfer:
{
Expand All @@ -818,11 +834,9 @@ public PreprocessBuildJob GetBuildJob(TranslationEngineType engineType)
BuildJobService,
SharedFileService,
LockFactory,
TrainSegmentPairs
)
{
CorpusService = CorpusService
};
TrainSegmentPairs,
new ParallelCorpusPreprocessingService(CorpusService)
);
}
default:
throw new InvalidOperationException("Unknown engine type.");
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
namespace Serval.Machine.Shared.Services;
using SIL.ServiceToolkit.Utils;

namespace Serval.Machine.Shared.Services;

[TestFixture]
public class SmtTransferEngineServiceTests
Expand Down Expand Up @@ -688,7 +690,8 @@ public override object ActivateJob(Type jobType)
_env.BuildJobService,
_env.SharedFileService,
_env._lockFactory,
_env.TrainSegmentPairs
_env.TrainSegmentPairs,
new ParallelCorpusPreprocessingService(new CorpusService())
)
{
TrainJobRunnerType = _env._trainJobRunnerType
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
using SIL.ServiceToolkit.Services;

namespace Microsoft.Extensions.DependencyInjection;
namespace Microsoft.Extensions.DependencyInjection;

public static class IHealthChecksBuilderExtensions
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
namespace Microsoft.Extensions.DependencyInjection;

public static class IServiceCollectionExtensions
{
public static IServiceCollection AddParallelCorpusPreprocessor(this IServiceCollection services)
{
services.AddSingleton<IParallelCorpusPreprocessingService, ParallelCorpusPreprocessingService>();
services.AddSingleton<ICorpusService, CorpusService>();
return services;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
namespace SIL.ServiceToolkit.Utils;

public interface IParallelCorpusPreprocessingService
{
void Preprocess(
IReadOnlyList<ParallelCorpus> corpora,
Action<Row> train,
Action<Row, ParallelCorpus> pretranslate,
bool useKeyTerms = false
);
}
Loading

0 comments on commit 2768b4c

Please sign in to comment.