Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Serval API to use IJob and IEngine interfaces #502

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
namespace Serval.Machine.Shared.Models;

public record Corpus
public record TranslationCorpus
{
public required string Id { get; init; }
public required string SourceLanguage { get; init; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Task StartBuildAsync(
string engineId,
string buildId,
string? buildOptions,
IReadOnlyList<Corpus> corpora,
IReadOnlyList<TranslationCorpus> corpora,
CancellationToken cancellationToken = default
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public async Task StartBuildAsync(
string engineId,
string buildId,
string? buildOptions,
IReadOnlyList<Corpus> corpora,
IReadOnlyList<TranslationCorpus> corpora,
CancellationToken cancellationToken = default
)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public Job CreateJob(string engineId, string buildId, BuildStage stage, object?
return stage switch
{
BuildStage.Preprocess
=> CreateJob<NmtPreprocessBuildJob, IReadOnlyList<Corpus>>(
=> CreateJob<NmtPreprocessBuildJob, IReadOnlyList<TranslationCorpus>>(
engineId,
buildId,
"nmt",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
namespace Serval.Machine.Shared.Services;

public class PreprocessBuildJob : HangfireBuildJob<IReadOnlyList<Corpus>>
public class PreprocessBuildJob : HangfireBuildJob<IReadOnlyList<TranslationCorpus>>
{
private static readonly JsonWriterOptions PretranslateWriterOptions = new() { Indented = true };

Expand Down Expand Up @@ -43,7 +43,7 @@ internal int Seed
protected override async Task DoWorkAsync(
string engineId,
string buildId,
IReadOnlyList<Corpus> data,
IReadOnlyList<TranslationCorpus> data,
string? buildOptions,
CancellationToken cancellationToken
)
Expand Down Expand Up @@ -99,7 +99,7 @@ CancellationToken cancellationToken

private async Task<(int TrainCount, int PretranslateCount)> WriteDataFilesAsync(
string buildId,
IReadOnlyList<Corpus> corpora,
IReadOnlyList<TranslationCorpus> corpora,
string? buildOptions,
CancellationToken cancellationToken
)
Expand All @@ -121,7 +121,7 @@ CancellationToken cancellationToken
int trainCount = 0;
int pretranslateCount = 0;
pretranslateWriter.WriteStartArray();
foreach (Corpus corpus in corpora)
foreach (TranslationCorpus corpus in corpora)
{
ITextCorpus[] sourceTextCorpora = _corpusService.CreateTextCorpora(corpus.SourceFiles).ToArray();
ITextCorpus targetTextCorpus =
Expand Down Expand Up @@ -204,7 +204,7 @@ CancellationToken cancellationToken
protected override async Task CleanupAsync(
string engineId,
string buildId,
IReadOnlyList<Corpus> data,
IReadOnlyList<TranslationCorpus> data,
JobCompletionStatus completionStatus
)
{
Expand All @@ -221,12 +221,12 @@ JobCompletionStatus completionStatus
}
}

private static bool IsInTrain(Row row, Corpus corpus)
private static bool IsInTrain(Row row, TranslationCorpus corpus)
{
return IsIncluded(row, corpus.TrainOnTextIds, corpus.TrainOnChapters);
}

private static bool IsInPretranslate(Row row, Corpus corpus)
private static bool IsInPretranslate(Row row, TranslationCorpus corpus)
{
return IsIncluded(row, corpus.PretranslateTextIds, corpus.PretranslateChapters);
}
Expand Down Expand Up @@ -255,7 +255,7 @@ private static bool IsInChapters(IReadOnlyDictionary<string, HashSet<int>> bookC
}

private static IEnumerable<Row?[]> AlignTrainCorpus(
Corpus corpus,
TranslationCorpus corpus,
IReadOnlyList<ITextCorpus> srcCorpora,
ITextCorpus trgCorpus
)
Expand Down Expand Up @@ -379,7 +379,11 @@ ITextCorpus trgCorpus
}
}

private static IEnumerable<Row> AlignPretranslateCorpus(Corpus corpus, ITextCorpus srcCorpus, ITextCorpus trgCorpus)
private static IEnumerable<Row> AlignPretranslateCorpus(
TranslationCorpus corpus,
ITextCorpus srcCorpus,
ITextCorpus trgCorpus
)
{
IEnumerable<string>? textIds = corpus.PretranslateChapters is not null
? corpus.PretranslateChapters.Keys
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ await engineService.TrainSegmentPairAsync(
public override async Task<Empty> StartBuild(StartBuildRequest request, ServerCallContext context)
{
ITranslationEngineService engineService = GetEngineService(request.EngineType);
Models.Corpus[] corpora = request.Corpora.Select(Map).ToArray();
Models.TranslationCorpus[] corpora = request.Corpora.Select(Map).ToArray();
try
{
await engineService.StartBuildAsync(
Expand Down Expand Up @@ -269,7 +269,7 @@ private static Translation.V1.Phrase Map(SIL.Machine.Translation.Phrase source)
};
}

private static Models.Corpus Map(Translation.V1.Corpus source)
private static Models.TranslationCorpus Map(Translation.V1.Corpus source)
{
var pretranslateChapters = source.PretranslateChapters.ToDictionary(
kvp => kvp.Key,
Expand All @@ -283,7 +283,7 @@ private static Models.Corpus Map(Translation.V1.Corpus source)
);
FilterChoice trainingFilter = GetFilterChoice(source.TrainOnAll, trainOnChapters);

return new Models.Corpus
return new Models.TranslationCorpus
{
Id = source.Id,
SourceLanguage = source.SourceLanguage,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ public async Task StartBuildAsync(
string engineId,
string buildId,
string? buildOptions,
IReadOnlyList<Corpus> corpora,
IReadOnlyList<TranslationCorpus> corpora,
CancellationToken cancellationToken = default
)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public Job CreateJob(string engineId, string buildId, BuildStage stage, object?
return stage switch
{
BuildStage.Preprocess
=> CreateJob<SmtTransferPreprocessBuildJob, IReadOnlyList<Corpus>>(
=> CreateJob<SmtTransferPreprocessBuildJob, IReadOnlyList<TranslationCorpus>>(
engineId,
buildId,
"smt_transfer",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ IRepository<TrainSegmentPair> trainSegmentPairs
protected override async Task InitializeAsync(
string engineId,
string buildId,
IReadOnlyList<Corpus> data,
IReadOnlyList<TranslationCorpus> data,
CancellationToken cancellationToken
)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ public async Task StartBuildAsync()
using var env = new TestEnvironment();
TranslationEngine engine = env.Engines.Get("engine1");
Assert.That(engine.BuildRevision, Is.EqualTo(1));
await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty<Corpus>());
await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty<TranslationCorpus>());
await env.WaitForBuildToFinishAsync();
engine = env.Engines.Get("engine1");
Assert.Multiple(() =>
Expand All @@ -28,7 +28,7 @@ public async Task CancelBuildAsync_Building()

TranslationEngine engine = env.Engines.Get("engine1");
Assert.That(engine.BuildRevision, Is.EqualTo(1));
await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty<Corpus>());
await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty<TranslationCorpus>());
await env.WaitForBuildToStartAsync();
engine = env.Engines.Get("engine1");
Assert.That(engine.CurrentBuild, Is.Not.Null);
Expand All @@ -55,7 +55,7 @@ public async Task DeleteAsync_WhileBuilding()

TranslationEngine engine = env.Engines.Get("engine1");
Assert.That(engine.BuildRevision, Is.EqualTo(1));
await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty<Corpus>());
await env.Service.StartBuildAsync("engine1", "build1", "{}", Array.Empty<TranslationCorpus>());
await env.WaitForBuildToStartAsync();
engine = env.Engines.Get("engine1");
Assert.That(engine.CurrentBuild, Is.Not.Null);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ public class PreprocessBuildJobTests
public async Task RunAsync_FilterOutEverything()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultTextFileCorpus with { };
TranslationCorpus corpus1 = env.DefaultTextFileCorpus with { };

await env.RunBuildJobAsync(corpus1);

Expand All @@ -25,7 +25,7 @@ public async Task RunAsync_FilterOutEverything()
public async Task RunAsync_TrainOnAll()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultTextFileCorpus with { TrainOnTextIds = null };
TranslationCorpus corpus1 = env.DefaultTextFileCorpus with { TrainOnTextIds = null };

await env.RunBuildJobAsync(corpus1);

Expand All @@ -43,7 +43,7 @@ public async Task RunAsync_TrainOnAll()
public async Task RunAsync_TrainOnTextIds()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultTextFileCorpus with { TrainOnTextIds = ["textId1"] };
TranslationCorpus corpus1 = env.DefaultTextFileCorpus with { TrainOnTextIds = ["textId1"] };

await env.RunBuildJobAsync(corpus1);

Expand All @@ -61,7 +61,11 @@ public async Task RunAsync_TrainOnTextIds()
public async Task RunAsync_TrainAndPretranslateAll()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultTextFileCorpus with { PretranslateTextIds = null, TrainOnTextIds = null };
TranslationCorpus corpus1 = env.DefaultTextFileCorpus with
{
PretranslateTextIds = null,
TrainOnTextIds = null
};

await env.RunBuildJobAsync(corpus1);

Expand All @@ -72,7 +76,7 @@ public async Task RunAsync_TrainAndPretranslateAll()
public async Task RunAsync_PretranslateAll()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultTextFileCorpus with { PretranslateTextIds = null };
TranslationCorpus corpus1 = env.DefaultTextFileCorpus with { PretranslateTextIds = null };

await env.RunBuildJobAsync(corpus1);

Expand All @@ -83,7 +87,11 @@ public async Task RunAsync_PretranslateAll()
public async Task RunAsync_PretranslateTextIds()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultTextFileCorpus with { PretranslateTextIds = ["textId1"], TrainOnTextIds = null };
TranslationCorpus corpus1 = env.DefaultTextFileCorpus with
{
PretranslateTextIds = ["textId1"],
TrainOnTextIds = null
};

await env.RunBuildJobAsync(corpus1);

Expand All @@ -94,7 +102,7 @@ public async Task RunAsync_PretranslateTextIds()
public async Task RunAsync_EnableKeyTerms()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultParatextCorpus with { };
TranslationCorpus corpus1 = env.DefaultParatextCorpus with { };

await env.RunBuildJobAsync(corpus1, useKeyTerms: true);

Expand All @@ -112,7 +120,7 @@ public async Task RunAsync_EnableKeyTerms()
public async Task RunAsync_DisableKeyTerms()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultParatextCorpus with { };
TranslationCorpus corpus1 = env.DefaultParatextCorpus with { };

await env.RunBuildJobAsync(corpus1, useKeyTerms: false);

Expand All @@ -130,7 +138,7 @@ public async Task RunAsync_DisableKeyTerms()
public async Task RunAsync_PretranslateChapters()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultParatextCorpus with
TranslationCorpus corpus1 = env.DefaultParatextCorpus with
{
PretranslateChapters = new Dictionary<string, HashSet<int>>
{
Expand All @@ -150,7 +158,7 @@ public async Task RunAsync_PretranslateChapters()
public async Task RunAsync_TrainOnChapters()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultParatextCorpus with
TranslationCorpus corpus1 = env.DefaultParatextCorpus with
{
TrainOnChapters = new Dictionary<string, HashSet<int>>
{
Expand All @@ -177,7 +185,7 @@ public async Task RunAsync_TrainOnChapters()
public async Task RunAsync_MixedSource_Paratext()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultMixedSourceParatextCorpus with
TranslationCorpus corpus1 = env.DefaultMixedSourceParatextCorpus with
{
TrainOnTextIds = null,
PretranslateTextIds = null
Expand All @@ -200,7 +208,7 @@ public async Task RunAsync_MixedSource_Paratext()
public async Task RunAsync_MixedSource_Text()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultMixedSourceTextFileCorpus with
TranslationCorpus corpus1 = env.DefaultMixedSourceTextFileCorpus with
{
TrainOnTextIds = null,
PretranslateTextIds = null,
Expand All @@ -225,7 +233,7 @@ public async Task RunAsync_MixedSource_Text()
public void RunAsync_UnknownLanguageTagsNoData()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultTextFileCorpus with { SourceLanguage = "xxx", TargetLanguage = "zzz" };
TranslationCorpus corpus1 = env.DefaultTextFileCorpus with { SourceLanguage = "xxx", TargetLanguage = "zzz" };

Assert.ThrowsAsync<InvalidOperationException>(async () =>
{
Expand All @@ -237,7 +245,7 @@ public void RunAsync_UnknownLanguageTagsNoData()
public async Task RunAsync_UnknownLanguageTagsNoDataSmtTransfer()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultTextFileCorpus with { SourceLanguage = "xxx", TargetLanguage = "zzz" };
TranslationCorpus corpus1 = env.DefaultTextFileCorpus with { SourceLanguage = "xxx", TargetLanguage = "zzz" };

await env.RunBuildJobAsync(corpus1, engineId: "engine2", engineType: TranslationEngineType.SmtTransfer);
}
Expand All @@ -246,7 +254,7 @@ public async Task RunAsync_UnknownLanguageTagsNoDataSmtTransfer()
public async Task RunAsync_RemoveFreestandingEllipses()
{
using TestEnvironment env = new();
Corpus corpus1 = env.DefaultParatextCorpus with
TranslationCorpus corpus1 = env.DefaultParatextCorpus with
{
TrainOnChapters = new Dictionary<string, HashSet<int>>
{
Expand Down Expand Up @@ -286,7 +294,7 @@ public async Task RunAsync_RemoveFreestandingEllipses()
public void RunAsync_OnlyParseSelectedBooks_NoBadBooks()
{
using TestEnvironment env = new();
Corpus corpus = env.DefaultParatextCorpus with
TranslationCorpus corpus = env.DefaultParatextCorpus with
{
TrainOnTextIds = new() { "LEV" },
PretranslateTextIds = new() { "MRK" }
Expand All @@ -310,7 +318,7 @@ public void RunAsync_OnlyParseSelectedBooks_NoBadBooks()
public void RunAsync_OnlyParseSelectedBooks_TrainOnBadBook()
{
using TestEnvironment env = new();
Corpus corpus = env.DefaultParatextCorpus with
TranslationCorpus corpus = env.DefaultParatextCorpus with
{
TrainOnTextIds = new() { "MAT" },
PretranslateTextIds = new() { "MRK" }
Expand All @@ -333,7 +341,7 @@ public void RunAsync_OnlyParseSelectedBooks_TrainOnBadBook()
public void RunAsync_OnlyParseSelectedBooks_PretranslateOnBadBook()
{
using TestEnvironment env = new();
Corpus corpus = env.DefaultParatextCorpus with
TranslationCorpus corpus = env.DefaultParatextCorpus with
{
TrainOnTextIds = new() { "LEV" },
PretranslateTextIds = new() { "MAT" }
Expand Down Expand Up @@ -375,10 +383,10 @@ private class TestEnvironment : DisposableBase
public IClearMLService ClearMLService { get; }
public IOptionsMonitor<BuildJobOptions> BuildJobOptions { get; }

public Corpus DefaultTextFileCorpus { get; }
public Corpus DefaultMixedSourceTextFileCorpus { get; }
public Corpus DefaultParatextCorpus { get; }
public Corpus DefaultMixedSourceParatextCorpus { get; }
public TranslationCorpus DefaultTextFileCorpus { get; }
public TranslationCorpus DefaultMixedSourceTextFileCorpus { get; }
public TranslationCorpus DefaultParatextCorpus { get; }
public TranslationCorpus DefaultMixedSourceParatextCorpus { get; }

public TestEnvironment()
{
Expand Down Expand Up @@ -614,7 +622,7 @@ public PreprocessBuildJob GetBuildJob(TranslationEngineType engineType)
}

public Task RunBuildJobAsync(
Corpus corpus,
TranslationCorpus corpus,
bool useKeyTerms = true,
string engineId = "engine1",
TranslationEngineType engineType = TranslationEngineType.Nmt
Expand Down
Loading
Loading