From 2a6983ff37d18abdecb31a6bb8689e8f46ed549b Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Thu, 3 Oct 2024 13:59:31 -0400 Subject: [PATCH] Add unit smoke test for more complex parallel corpus logic --- .../Services/PreprocessBuildJobTests.cs | 299 +++++++++--------- 1 file changed, 157 insertions(+), 142 deletions(-) diff --git a/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs b/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs index 6c9ca1e4..0d49be7a 100644 --- a/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs +++ b/src/Machine/test/Serval.Machine.Shared.Tests/Services/PreprocessBuildJobTests.cs @@ -337,147 +337,146 @@ public void RunAsync_OnlyParseSelectedBooks_PretranslateOnBadBook() }); } - // [Test] // TODO - // public async Task ParallelCorpusLogic() - // { - // using TestEnvironment env = new(); - // var corpora = new List() - // { - // new PreprocessBuildJob.ParallelCorpus() - // { - // Id = "1", - // SourceCorpora = new List() - // { - // new() - // { - // Id = "_1", - // Language = "en", - // Files = new List { env.ParatextFile("pt-source1") }, - // TrainOnChapters = new() - // { - // { - // "MAT", - // new() { 1 } - // }, - // { - // "LEV", - // new() { } - // } - // }, - // PretranslateChapters = new() - // { - // { - // "1CH", - // new() { } - // } - // } - // }, - // new() - // { - // Id = "_1", - // Language = "en", - // Files = new List { env.ParatextFile("pt-source2") }, - // TrainOnChapters = new() - // { - // { - // "MAT", - // new() { 1 } - // }, - // { - // "MRK", - // new() { } - // } - // }, - // }, - // }, - // TargetCorpora = new List() - // { - // new() - // { - // Id = "_1", - // Language = "en", - // Files = new List { env.ParatextFile("pt-target1") }, - // TrainOnChapters = new() - // { - // { - // "MAT", - // new() { 1 } - // }, - // { - // "MRK", - // new() { } - // } - // } - // }, - // new() - // { - // Id = "_2", - // Language = "en", - // Files = new List { env.ParatextFile("pt-target2") }, - // TrainOnChapters = new() - // { - // { - // "MAT", - // new() { 1 } - // }, - // { - // "MRK", - // new() { } - // }, - // { - // "LEV", - // new() { } - // } - // } - // } - // } - // } - // }; - // await env.GetBuildJob(TranslationEngineType.SmtTransfer) - // .WriteDataFilesAsync("build1", corpora, "{\"use_key_terms\":false}", default); - // Assert.Multiple(async () => - // { - // Assert.That( - // await env.GetSourceExtractAsync(), - // Is.EqualTo( - // @"Source one, chapter fourteen, verse fifty-five. Segment b. - // Source one, chapter fourteen, verse fifty-six. - // Source one, chapter one, verse one. - // Source two, chapter one, verse two. - // Source two, chapter one, verse three. - // Source two, chapter one, verse four. - // Source two, chapter one, verse five. Source two, chapter one, verse six. - // Source two, chapter one, verse seven. Source two, chapter one, verse eight. - // Source two, chapter one, verse nine. Source two, chapter one, verse ten. - // Source two, chapter one, verse one. - // " - // ) - // ); - // Assert.That( - // await env.GetTargetExtractAsync(), - // Is.EqualTo( - // @"Target two, chapter fourteen, verse fifty-five. - // Target two, chapter fourteen, verse fifty-six. - // Target one, chapter one, verse one. - // Target one, chapter one, verse two. - // Target one, chapter one, verse three. - - // Target one, chapter one, verse five and six. - // Target one, chapter one, verse seven and eight. - // Target one, chapter one, verse nine and ten. - - // " - // ) - // ); - // }); - // JsonArray? pretranslations = await env.GetPretranslationsAsync(); - // Assert.That(pretranslations, Is.Not.Null); - // Assert.That(pretranslations!.Count, Is.EqualTo(7), pretranslations.ToJsonString()); - // Assert.That( - // pretranslations[2]!["translation"]!.ToString(), - // Is.EqualTo("Source one, chapter twelve, verse one.") - // ); - // } + [Test] + public async Task ParallelCorpusLogic() + { + using TestEnvironment env = new(); + var corpora = new List() + { + new ParallelCorpus() + { + Id = "1", + SourceCorpora = new List() + { + new() + { + Id = "_1", + Language = "en", + Files = new List { env.ParatextFile("pt-source1") }, + TrainOnChapters = new() + { + { + "MAT", + new() { 1 } + }, + { + "LEV", + new() { } + } + }, + PretranslateChapters = new() + { + { + "1CH", + new() { } + } + } + }, + new() + { + Id = "_1", + Language = "en", + Files = new List { env.ParatextFile("pt-source2") }, + TrainOnChapters = new() + { + { + "MAT", + new() { 1 } + }, + { + "MRK", + new() { } + } + }, + }, + }, + TargetCorpora = new List() + { + new() + { + Id = "_1", + Language = "en", + Files = new List { env.ParatextFile("pt-target1") }, + TrainOnChapters = new() + { + { + "MAT", + new() { 1 } + }, + { + "MRK", + new() { } + } + } + }, + new() + { + Id = "_2", + Language = "en", + Files = new List { env.ParatextFile("pt-target2") }, + TrainOnChapters = new() + { + { + "MAT", + new() { 1 } + }, + { + "MRK", + new() { } + }, + { + "LEV", + new() { } + } + } + } + } + } + }; + await env.RunBuildJobAsync(corpora, useKeyTerms: false); + Assert.Multiple(async () => + { + Assert.That( + await env.GetSourceExtractAsync(), + Is.EqualTo( + @"Source one, chapter fourteen, verse fifty-five. Segment b. +Source one, chapter fourteen, verse fifty-six. +Source one, chapter one, verse one. +Source two, chapter one, verse two. +Source two, chapter one, verse three. +Source two, chapter one, verse four. +Source two, chapter one, verse five. Source two, chapter one, verse six. +Source two, chapter one, verse seven. Source two, chapter one, verse eight. +Source two, chapter one, verse nine. Source two, chapter one, verse ten. +Source two, chapter one, verse one. +" + ) + ); + Assert.That( + await env.GetTargetExtractAsync(), + Is.EqualTo( + @"Target two, chapter fourteen, verse fifty-five. +Target two, chapter fourteen, verse fifty-six. +Target one, chapter one, verse one. +Target one, chapter one, verse two. +Target one, chapter one, verse three. + +Target one, chapter one, verse five and six. +Target one, chapter one, verse seven and eight. +Target one, chapter one, verse nine and ten. + +" + ) + ); + }); + JsonArray? pretranslations = await env.GetPretranslationsAsync(); + Assert.That(pretranslations, Is.Not.Null); + Assert.That(pretranslations!.Count, Is.EqualTo(37), pretranslations.ToJsonString()); + Assert.That( + pretranslations[2]!["translation"]!.ToString(), + Is.EqualTo("Source one, chapter twelve, verse one.") + ); + } private class TestEnvironment : DisposableBase { @@ -937,9 +936,25 @@ public Task RunBuildJobAsync( string engineId = "engine1", TranslationEngineType engineType = TranslationEngineType.Nmt ) + { + return RunBuildJobAsync([corpus], useKeyTerms, engineId, engineType); + } + + public Task RunBuildJobAsync( + IEnumerable corpora, + bool useKeyTerms = true, + string engineId = "engine1", + TranslationEngineType engineType = TranslationEngineType.Nmt + ) { return GetBuildJob(engineType) - .RunAsync(engineId, "build1", [corpus], useKeyTerms ? null : "{\"use_key_terms\":false}", default); + .RunAsync( + engineId, + "build1", + corpora.ToList(), + useKeyTerms ? null : "{\"use_key_terms\":false}", + default + ); } public async Task GetSourceExtractAsync()