From e4d83d01235b9ec3b9e68a6a64caf10894648c57 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 18 Apr 2024 15:58:25 -0400 Subject: [PATCH] Reviewer comments Update from reviewer comments Update test coverage, update to most recent Machine version. Update documentation. --- src/Serval.Client/Client.g.cs | 90 ++++++++---- src/Serval.Shared/Serval.Shared.csproj | 2 +- .../Contracts/PretranslationUsfmTextOrigin.cs | 9 ++ .../TranslationEnginesController.cs | 33 +++-- .../Services/IPretranslationService.cs | 1 + .../Services/PretranslationService.cs | 86 +++++++++-- .../Services/EngineServiceTests.cs | 2 +- .../Services/PretranslationServiceTests.cs | 133 ++++++++++++------ tests/Serval.Translation.Tests/Usings.cs | 1 + 9 files changed, 265 insertions(+), 92 deletions(-) create mode 100644 src/Serval.Translation/Contracts/PretranslationUsfmTextOrigin.cs diff --git a/src/Serval.Client/Client.g.cs b/src/Serval.Client/Client.g.cs index e190a91f..9de2c1bd 100644 --- a/src/Serval.Client/Client.g.cs +++ b/src/Serval.Client/Client.g.cs @@ -1625,14 +1625,21 @@ public partial interface ITranslationEnginesClient ///
segments in the the target book and returned. If the USFM book does not exist in the target corpus, then the ///
pretranslated text will be inserted into an empty template created from the source USFM book and returned. ///
Only pretranslations for the most recent successful build of the engine are returned. - ///
Both scripture and non-scripture text is pretranslated according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation) + ///
+ ///
The text that populates the USFM structure can be controlled by the `textOrigin` parameter where with these options: + ///
* `PreferExisting`: The existing and pretranslated texts are merged into the USFM, preferring existing text. **This is the default**. + ///
* `PreferPretranslated`: The existing and pretranslated texts are merged into the USFM, preferring pretranslated text. + ///
* `OnlyExisting`: Return the existing target USFM file with no modifications (except updating the USFM id if needed) + ///
* `OnlyPretranslated`: Only the pretranslated text is returned; all existing text in the target USFM is removed + ///
Both scripture and non-scripture text in the USFM is parsed and grouped according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation) /// /// The translation engine id /// The corpus id /// The text id + /// The source[s] of the data to populate the USFM file with. /// The book in USFM format /// A server side error occurred. - System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// @@ -1648,18 +1655,18 @@ public partial interface ITranslationEnginesClient /// Starts a build job for a translation engine. /// /// - /// Specify the corpora or textIds to pretranslate. Even when a corpus or textId - ///
is selected for pretranslation, only "untranslated" text will be pretranslated: - ///
that is, segments (lines of text) in the specified corpora or textId's that have - ///
untranslated text but no translated text. If a corpus is a Paratext project, - ///
you may flag a subset of books for pretranslation by including their [abbreviations](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) - ///
in the textIds parameter. If the engine does not support pretranslation, these fields have no effect. - ///
- ///
Similarly, specify the corpora and textIds to train on. If no train_on field is provided, all corpora will be used. - ///
Paratext projects can be filtered by book for training and pretranslating. This filtering follows the original versification. - ///
To filter, use the 3 character code for the book of the Bible in the textID while building. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + /// Specify the corpora and textIds to train on. If no "trainOn" field is provided, all corpora will be used. + ///
Paratext Projects, you may flag a subset of books for training by including their [abbreviations] + ///
Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. ///
Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) - ///
for more details. + ///
All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + ///
+ ///
Specify the corpora or textIds to pretranslate. When a corpus or textId is selected for pretranslation, + ///
the following text will be pretranslated: + ///
* Text segments that are in the source and not the target (untranslated) + ///
* Text segments that are in the source and the target, but where that target segment is not trained on. + ///
If the engine does not support pretranslation, these fields have no effect. + ///
Pretranslating has the same filtering as training. ///
///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. ///
See [nmt job settings documentation](https://github.com/sillsdev/serval/wiki/NMT-Build-Options) about configuring job parameters. @@ -3586,14 +3593,21 @@ public string BaseUrl ///
segments in the the target book and returned. If the USFM book does not exist in the target corpus, then the ///
pretranslated text will be inserted into an empty template created from the source USFM book and returned. ///
Only pretranslations for the most recent successful build of the engine are returned. - ///
Both scripture and non-scripture text is pretranslated according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation) + ///
+ ///
The text that populates the USFM structure can be controlled by the `textOrigin` parameter where with these options: + ///
* `PreferExisting`: The existing and pretranslated texts are merged into the USFM, preferring existing text. **This is the default**. + ///
* `PreferPretranslated`: The existing and pretranslated texts are merged into the USFM, preferring pretranslated text. + ///
* `OnlyExisting`: Return the existing target USFM file with no modifications (except updating the USFM id if needed) + ///
* `OnlyPretranslated`: Only the pretranslated text is returned; all existing text in the target USFM is removed + ///
Both scripture and non-scripture text in the USFM is parsed and grouped according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation) ///
/// The translation engine id /// The corpus id /// The text id + /// The source[s] of the data to populate the USFM file with. /// The book in USFM format /// A server side error occurred. - public virtual async System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); @@ -3623,6 +3637,12 @@ public string BaseUrl urlBuilder_.Append("/pretranslations/"); urlBuilder_.Append(System.Uri.EscapeDataString(ConvertToString(textId, System.Globalization.CultureInfo.InvariantCulture))); urlBuilder_.Append("/usfm"); + urlBuilder_.Append('?'); + if (textOrigin != null) + { + urlBuilder_.Append(System.Uri.EscapeDataString("text-origin")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(textOrigin, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); + } + urlBuilder_.Length--; PrepareRequest(client_, request_, urlBuilder_); @@ -3824,18 +3844,18 @@ public string BaseUrl /// Starts a build job for a translation engine. /// /// - /// Specify the corpora or textIds to pretranslate. Even when a corpus or textId - ///
is selected for pretranslation, only "untranslated" text will be pretranslated: - ///
that is, segments (lines of text) in the specified corpora or textId's that have - ///
untranslated text but no translated text. If a corpus is a Paratext project, - ///
you may flag a subset of books for pretranslation by including their [abbreviations](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) - ///
in the textIds parameter. If the engine does not support pretranslation, these fields have no effect. - ///
- ///
Similarly, specify the corpora and textIds to train on. If no train_on field is provided, all corpora will be used. - ///
Paratext projects can be filtered by book for training and pretranslating. This filtering follows the original versification. - ///
To filter, use the 3 character code for the book of the Bible in the textID while building. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + /// Specify the corpora and textIds to train on. If no "trainOn" field is provided, all corpora will be used. + ///
Paratext Projects, you may flag a subset of books for training by including their [abbreviations] + ///
Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. ///
Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) - ///
for more details. + ///
All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + ///
+ ///
Specify the corpora or textIds to pretranslate. When a corpus or textId is selected for pretranslation, + ///
the following text will be pretranslated: + ///
* Text segments that are in the source and not the target (untranslated) + ///
* Text segments that are in the source and the target, but where that target segment is not trained on. + ///
If the engine does not support pretranslation, these fields have no effect. + ///
Pretranslating has the same filtering as training. ///
///
The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. ///
See [nmt job settings documentation](https://github.com/sillsdev/serval/wiki/NMT-Build-Options) about configuring job parameters. @@ -5965,6 +5985,24 @@ public partial class Pretranslation } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.0.2.0 (NJsonSchema v11.0.0.0 (Newtonsoft.Json v13.0.0.0))")] + public enum PretranslationUsfmTextOrigin + { + + [System.Runtime.Serialization.EnumMember(Value = @"PreferExisting")] + PreferExisting = 0, + + [System.Runtime.Serialization.EnumMember(Value = @"PreferPretranslated")] + PreferPretranslated = 1, + + [System.Runtime.Serialization.EnumMember(Value = @"OnlyExisting")] + OnlyExisting = 2, + + [System.Runtime.Serialization.EnumMember(Value = @"OnlyPretranslated")] + OnlyPretranslated = 3, + + } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.0.2.0 (NJsonSchema v11.0.0.0 (Newtonsoft.Json v13.0.0.0))")] public partial class TranslationBuild { diff --git a/src/Serval.Shared/Serval.Shared.csproj b/src/Serval.Shared/Serval.Shared.csproj index c653e763..b5bddd8b 100644 --- a/src/Serval.Shared/Serval.Shared.csproj +++ b/src/Serval.Shared/Serval.Shared.csproj @@ -19,7 +19,7 @@ - + diff --git a/src/Serval.Translation/Contracts/PretranslationUsfmTextOrigin.cs b/src/Serval.Translation/Contracts/PretranslationUsfmTextOrigin.cs new file mode 100644 index 00000000..7003f111 --- /dev/null +++ b/src/Serval.Translation/Contracts/PretranslationUsfmTextOrigin.cs @@ -0,0 +1,9 @@ +namespace Serval.Translation.Contracts; + +public enum PretranslationUsfmTextOrigin +{ + PreferExisting, + PreferPretranslated, + OnlyExisting, + OnlyPretranslated +} diff --git a/src/Serval.Translation/Controllers/TranslationEnginesController.cs b/src/Serval.Translation/Controllers/TranslationEnginesController.cs index 4d88cb4e..fa94ec3d 100644 --- a/src/Serval.Translation/Controllers/TranslationEnginesController.cs +++ b/src/Serval.Translation/Controllers/TranslationEnginesController.cs @@ -621,11 +621,18 @@ CancellationToken cancellationToken /// segments in the the target book and returned. If the USFM book does not exist in the target corpus, then the /// pretranslated text will be inserted into an empty template created from the source USFM book and returned. /// Only pretranslations for the most recent successful build of the engine are returned. - /// Both scripture and non-scripture text is pretranslated according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation) + /// + /// The text that populates the USFM structure can be controlled by the `textOrigin` parameter where with these options: + /// * `PreferExisting`: The existing and pretranslated texts are merged into the USFM, preferring existing text. **This is the default**. + /// * `PreferPretranslated`: The existing and pretranslated texts are merged into the USFM, preferring pretranslated text. + /// * `OnlyExisting`: Return the existing target USFM file with no modifications (except updating the USFM id if needed) + /// * `OnlyPretranslated`: Only the pretranslated text is returned; all existing text in the target USFM is removed + /// Both scripture and non-scripture text in the USFM is parsed and grouped according to [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation) ///
/// The translation engine id /// The corpus id /// The text id + /// The source[s] of the data to populate the USFM file with. /// /// The book in USFM format /// The specified book does not exist in the source or target corpus. @@ -650,6 +657,7 @@ public async Task GetPretranslatedUsfmAsync( [NotNull] string id, [NotNull] string corpusId, [NotNull] string textId, + [FromQuery(Name = "text-origin")] PretranslationUsfmTextOrigin? textOrigin, CancellationToken cancellationToken ) { @@ -665,6 +673,7 @@ CancellationToken cancellationToken engine.ModelRevision, corpusId, textId, + textOrigin ?? PretranslationUsfmTextOrigin.PreferExisting, cancellationToken ); if (usfm == "") @@ -762,18 +771,18 @@ CancellationToken cancellationToken /// Starts a build job for a translation engine. /// /// - /// Specify the corpora or textIds to pretranslate. Even when a corpus or textId - /// is selected for pretranslation, only "untranslated" text will be pretranslated: - /// that is, segments (lines of text) in the specified corpora or textId's that have - /// untranslated text but no translated text. If a corpus is a Paratext project, - /// you may flag a subset of books for pretranslation by including their [abbreviations](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) - /// in the textIds parameter. If the engine does not support pretranslation, these fields have no effect. - /// - /// Similarly, specify the corpora and textIds to train on. If no train_on field is provided, all corpora will be used. - /// Paratext projects can be filtered by book for training and pretranslating. This filtering follows the original versification. - /// To filter, use the 3 character code for the book of the Bible in the textID while building. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + /// Specify the corpora and textIds to train on. If no "trainOn" field is provided, all corpora will be used. + /// Paratext Projects, you may flag a subset of books for training by including their [abbreviations] + /// Paratext projects can be filtered by [book](https://github.com/sillsdev/libpalaso/blob/master/SIL.Scripture/Canon.cs) using the textId for training. /// Filters can also be supplied via scriptureRange parameter as ranges of biblical text. See [here](https://github.com/sillsdev/serval/wiki/Filtering-Paratext-Project-Data-with-a-Scripture-Range) - /// for more details. + /// All Paratext project filtering follows original versification. See [here](https://github.com/sillsdev/serval/wiki/Versification-in-Serval) for more information. + /// + /// Specify the corpora or textIds to pretranslate. When a corpus or textId is selected for pretranslation, + /// the following text will be pretranslated: + /// * Text segments that are in the source and not the target (untranslated) + /// * Text segments that are in the source and the target, but where that target segment is not trained on. + /// If the engine does not support pretranslation, these fields have no effect. + /// Pretranslating has the same filtering as training. /// /// The `"options"` parameter of the build config provides the ability to pass build configuration parameters as a JSON object. /// See [nmt job settings documentation](https://github.com/sillsdev/serval/wiki/NMT-Build-Options) about configuring job parameters. diff --git a/src/Serval.Translation/Services/IPretranslationService.cs b/src/Serval.Translation/Services/IPretranslationService.cs index 146c945b..dc626553 100644 --- a/src/Serval.Translation/Services/IPretranslationService.cs +++ b/src/Serval.Translation/Services/IPretranslationService.cs @@ -15,6 +15,7 @@ Task GetUsfmAsync( int modelRevision, string corpusId, string textId, + PretranslationUsfmTextOrigin textOrigin, CancellationToken cancellationToken = default ); } diff --git a/src/Serval.Translation/Services/PretranslationService.cs b/src/Serval.Translation/Services/PretranslationService.cs index f264f2c9..306ec531 100644 --- a/src/Serval.Translation/Services/PretranslationService.cs +++ b/src/Serval.Translation/Services/PretranslationService.cs @@ -34,6 +34,7 @@ public async Task GetUsfmAsync( int modelRevision, string corpusId, string textId, + PretranslationUsfmTextOrigin textOrigin, CancellationToken cancellationToken = default ) { @@ -70,20 +71,81 @@ await GetAllAsync(engineId, modelRevision, corpusId, textId, cancellationToken) // Update the target book if it exists string? usfm = await _scriptureDataFileService.ReadParatextProjectBookAsync(targetFile.Filename, textId); if (usfm is not null) - return UpdateUsfm(targetSettings, usfm, pretranslations, strictComparison: false); + { + switch (textOrigin) + { + case PretranslationUsfmTextOrigin.PreferExisting: + return UpdateUsfm( + targetSettings, + usfm, + pretranslations, + fullName: targetSettings.FullName, + stripAllText: false, + strictComparison: false, + preferExistingText: true + ); + case PretranslationUsfmTextOrigin.PreferPretranslated: + return UpdateUsfm( + targetSettings, + usfm, + pretranslations, + fullName: targetSettings.FullName, + stripAllText: false, + strictComparison: false, + preferExistingText: false + ); + case PretranslationUsfmTextOrigin.OnlyExisting: + return UpdateUsfm( + targetSettings, + usfm, + pretranslations: [], // don't put any pretranslations, we only want the existing text. + fullName: targetSettings.FullName, + strictComparison: false, + stripAllText: false, + preferExistingText: false + ); + case PretranslationUsfmTextOrigin.OnlyPretranslated: + return UpdateUsfm( + targetSettings, + usfm, + pretranslations, + fullName: targetSettings.FullName, + strictComparison: false, + stripAllText: true, + preferExistingText: false + ); + } + } // Copy and update the source book if it exists usfm = await _scriptureDataFileService.ReadParatextProjectBookAsync(sourceFile.Filename, textId); if (usfm is not null) { - return UpdateUsfm( - sourceSettings, - usfm, - pretranslations, - targetSettings.FullName, - stripAllText: true, - strictComparison: true - ); + switch (textOrigin) + { + case PretranslationUsfmTextOrigin.PreferExisting: + case PretranslationUsfmTextOrigin.PreferPretranslated: + case PretranslationUsfmTextOrigin.OnlyPretranslated: + return UpdateUsfm( + sourceSettings, + usfm, + pretranslations, + fullName: targetSettings.FullName, + stripAllText: true, + strictComparison: true, + preferExistingText: true + ); + case PretranslationUsfmTextOrigin.OnlyExisting: + return UpdateUsfm( + sourceSettings, + usfm, + pretranslations: [], // don't pass the pretranslations, we only want the existing text. + fullName: targetSettings.FullName, + stripAllText: true, + strictComparison: true, + preferExistingText: true + ); + } } return ""; @@ -95,14 +157,16 @@ private static string UpdateUsfm( IReadOnlyList<(IReadOnlyList, string)> pretranslations, string? fullName = null, bool stripAllText = false, - bool strictComparison = false + bool strictComparison = false, + bool preferExistingText = true ) { var updater = new UsfmTextUpdater( pretranslations, fullName is null ? null : $"- {fullName}", stripAllText, - strictComparison: strictComparison + strictComparison: strictComparison, + preferExistingText: preferExistingText ); UsfmParser.Parse(usfm, updater, settings.Stylesheet, settings.Versification); return updater.GetUsfm(settings.Stylesheet); diff --git a/tests/Serval.Translation.Tests/Services/EngineServiceTests.cs b/tests/Serval.Translation.Tests/Services/EngineServiceTests.cs index 1b0a03c8..7ee6bf75 100644 --- a/tests/Serval.Translation.Tests/Services/EngineServiceTests.cs +++ b/tests/Serval.Translation.Tests/Services/EngineServiceTests.cs @@ -342,7 +342,7 @@ private static TranslationSources[] GetSources(int count, bool isUnknown) { sources[i] = new TranslationSources(); if (!isUnknown) - sources[i].Values.Add(TranslationSource.Primary); + sources[i].Values.Add(V1.TranslationSource.Primary); } return sources; } diff --git a/tests/Serval.Translation.Tests/Services/PretranslationServiceTests.cs b/tests/Serval.Translation.Tests/Services/PretranslationServiceTests.cs index 0bfb81c1..645376f3 100644 --- a/tests/Serval.Translation.Tests/Services/PretranslationServiceTests.cs +++ b/tests/Serval.Translation.Tests/Services/PretranslationServiceTests.cs @@ -4,50 +4,28 @@ public class PretranslationServiceTests { [Test] - public async Task GetUsfmAsync_SourceBook() + [TestCase(PretranslationUsfmTextOrigin.PreferPretranslated, "OnlyPretranslated")] + [TestCase(PretranslationUsfmTextOrigin.PreferExisting, "OnlyPretranslated")] + [TestCase(PretranslationUsfmTextOrigin.OnlyPretranslated, "OnlyPretranslated")] + [TestCase(PretranslationUsfmTextOrigin.OnlyExisting, "Blank")] + public async Task GetUsfmAsync_SourceBook(PretranslationUsfmTextOrigin textOrigin, string returnUsfmType) { TestEnvironment env = new(); - string usfm = await env.Service.GetUsfmAsync("engine1", 1, "corpus1", "MAT"); - Assert.That( - usfm.Replace("\r\n", "\n"), - Is.EqualTo( - @"\id MAT - TRG -\h -\c 1 -\p -\v 1 Chapter 1, verse 1. -\v 2 Chapter 1, verse 2. -\c 2 -\p -\v 1 Chapter 2, verse 1. -\v 2 -".Replace("\r\n", "\n") - ) - ); + string usfm = await env.Service.GetUsfmAsync("engine1", 1, "corpus1", "MAT", textOrigin: textOrigin); + Assert.That(usfm.Replace("\r\n", "\n"), Is.EqualTo(TestEnvironment.GetUsfm(returnUsfmType, id: "MAT - TRG"))); } [Test] - public async Task GetUsfmAsync_TargetBook() + [TestCase(PretranslationUsfmTextOrigin.PreferPretranslated, "PreferPretranslated")] + [TestCase(PretranslationUsfmTextOrigin.PreferExisting, "PreferExisting")] + [TestCase(PretranslationUsfmTextOrigin.OnlyPretranslated, "OnlyPretranslated")] + [TestCase(PretranslationUsfmTextOrigin.OnlyExisting, "OnlyExisting")] + public async Task GetUsfmAsync_TargetBook(PretranslationUsfmTextOrigin textOrigin, string returnUsfmType) { TestEnvironment env = new(); env.AddMatthewToTarget(); - string usfm = await env.Service.GetUsfmAsync("engine1", 1, "corpus1", "MAT"); - Assert.That( - usfm.Replace("\r\n", "\n"), - Is.EqualTo( - @"\id MAT - TRG -\h Matthew -\c 1 -\p -\v 1 Chapter 1, verse 1. -\v 2 Chapter 1, verse 2. -\c 2 -\p -\v 1 Chapter 2, verse 1. -\v 2 Chapter two, verse two. -".Replace("\r\n", "\n") - ) - ); + string usfm = await env.Service.GetUsfmAsync("engine1", 1, "corpus1", "MAT", textOrigin: textOrigin); + Assert.That(usfm.Replace("\r\n", "\n"), Is.EqualTo(TestEnvironment.GetUsfm(returnUsfmType, id: "MAT - TRG"))); } private class TestEnvironment @@ -136,7 +114,7 @@ public TestEnvironment() ScriptureDataFileService.GetParatextProjectSettings("file2.zip").Returns(CreateProjectSettings("TRG")); ScriptureDataFileService .ReadParatextProjectBookAsync("file1.zip", "MAT") - .Returns(Task.FromResult(CreateUsfm("SRC", "MAT"))); + .Returns(Task.FromResult(CreateExisting(book: "MAT", id: "MAT - SRC"))); ScriptureDataFileService .ReadParatextProjectBookAsync("file2.zip", "MAT") .Returns(Task.FromResult(null)); @@ -152,7 +130,7 @@ public void AddMatthewToTarget() { ScriptureDataFileService .ReadParatextProjectBookAsync("file2.zip", "MAT") - .Returns(Task.FromResult(CreateUsfm("TRG", "MAT"))); + .Returns(Task.FromResult(CreateExisting(book: "MAT", id: "MAT - TRG"))); } private static ParatextProjectSettings CreateProjectSettings(string name) @@ -172,19 +150,92 @@ private static ParatextProjectSettings CreateProjectSettings(string name) ); } - private static string CreateUsfm(string name, string book) + private static string CreateExisting(string book = "MAT", string id = "MAT - TRG") { - return $@"\id {book} - {name} + return $@"\id {id} \h {Canon.BookIdToEnglishName(book)} \c 1 \p \v 1 Chapter one, verse one. -\v 2 Chapter one, verse two. +\v 2 \c 2 \p \v 1 Chapter two, verse one. \v 2 Chapter two, verse two. "; } + + private static string CreatePretranslationsOnly(string id = "MAT - TRG") + { + return $@"\id {id} +\h +\c 1 +\p +\v 1 Chapter 1, verse 1. +\v 2 Chapter 1, verse 2. +\c 2 +\p +\v 1 Chapter 2, verse 1. +\v 2 +"; + } + + private static string CreatePreferPretranslations(string book = "MAT", string id = "MAT - TRG") + { + return $@"\id {id} +\h {Canon.BookIdToEnglishName(book)} +\c 1 +\p +\v 1 Chapter 1, verse 1. +\v 2 Chapter 1, verse 2. +\c 2 +\p +\v 1 Chapter 2, verse 1. +\v 2 Chapter two, verse two. +"; + } + + private static string CreatePreferExisting(string book = "MAT", string id = "MAT - TRG") + { + return $@"\id {id} +\h {Canon.BookIdToEnglishName(book)} +\c 1 +\p +\v 1 Chapter one, verse one. +\v 2 Chapter 1, verse 2. +\c 2 +\p +\v 1 Chapter two, verse one. +\v 2 Chapter two, verse two. +"; + } + + private static string CreateBlank(string id = "MAT - TRG") + { + return $@"\id {id} +\h +\c 1 +\p +\v 1 +\v 2 +\c 2 +\p +\v 1 +\v 2 +"; + } + + public static string GetUsfm(string type, string book = "MAT", string id = "MAT - TRG") + { + return type switch + { + "OnlyPretranslated" => CreatePretranslationsOnly(id), + "PreferPretranslated" => CreatePreferPretranslations(book, id), + "PreferExisting" => CreatePreferExisting(book, id), + "OnlyExisting" => CreateExisting(book, id), + "Blank" => CreateBlank(id), + _ => throw new ArgumentOutOfRangeException(nameof(type), type, null) + }; + } } } diff --git a/tests/Serval.Translation.Tests/Usings.cs b/tests/Serval.Translation.Tests/Usings.cs index 7bf4d0eb..ef8a3ff7 100644 --- a/tests/Serval.Translation.Tests/Usings.cs +++ b/tests/Serval.Translation.Tests/Usings.cs @@ -9,6 +9,7 @@ global using Serval.Shared.Configuration; global using Serval.Shared.Services; global using Serval.Shared.Utils; +global using Serval.Translation.Contracts; global using Serval.Translation.Models; global using SIL.DataAccess; global using SIL.Machine.Corpora;