diff --git a/src/SIL.Machine.AspNetCore/Models/Corpus.cs b/src/SIL.Machine.AspNetCore/Models/Corpus.cs index c33bc52c..bf741e29 100644 --- a/src/SIL.Machine.AspNetCore/Models/Corpus.cs +++ b/src/SIL.Machine.AspNetCore/Models/Corpus.cs @@ -7,6 +7,8 @@ public class Corpus public string TargetLanguage { get; set; } = default!; public bool TrainOnAll { get; set; } public bool PretranslateAll { get; set; } + public Dictionary>? TrainOnChapters { get; set; } + public Dictionary>? PretranslateChapters { get; set; } public HashSet TrainOnTextIds { get; set; } = default!; public HashSet PretranslateTextIds { get; set; } = default!; public List SourceFiles { get; set; } = default!; diff --git a/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs b/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs index 4c442377..3b21dda7 100644 --- a/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs +++ b/src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs @@ -1,4 +1,5 @@ -using Google.Protobuf; +using System.Data; +using Google.Protobuf; using MongoDB.Bson.IO; namespace SIL.Machine.AspNetCore.Services; @@ -129,14 +130,34 @@ async IAsyncEnumerable ProcessRowsAsync() foreach (ParallelTextRow row in parallelCorpora.Flatten()) { - if (corpus.TrainOnAll || corpus.TrainOnTextIds.Contains(row.TextId)) + bool isInTrainOnChapters = false; + bool isInPretranslateChapters = false; + if (targetCorpora[CorpusType.Text] is ScriptureTextCorpus stc) + { + bool IsInChapters(Dictionary> bookChapters, object rowRef) + { + if (rowRef is not VerseRef vr) + return false; + return bookChapters.TryGetValue(vr.Book, out HashSet? chapters) + && (chapters.Contains(vr.ChapterNum) || chapters.Count == 0); + } + if (corpus.TrainOnChapters is not null) + isInTrainOnChapters = row.Refs.Any(r => IsInChapters(corpus.TrainOnChapters, r)); + if (corpus.PretranslateChapters is not null) + isInPretranslateChapters = row.Refs.Any(r => IsInChapters(corpus.PretranslateChapters, r)); + } + if (corpus.TrainOnAll || corpus.TrainOnTextIds.Contains(row.TextId) || isInTrainOnChapters) { await sourceTrainWriter.WriteAsync($"{row.SourceText}\n"); await targetTrainWriter.WriteAsync($"{row.TargetText}\n"); counts["NumTrainRows"] += 1; } if ( - (corpus.PretranslateAll || corpus.PretranslateTextIds.Contains(row.TextId)) + ( + corpus.PretranslateAll + || corpus.PretranslateTextIds.Contains(row.TextId) + || isInPretranslateChapters + ) && row.SourceSegment.Count > 0 && row.TargetSegment.Count == 0 ) diff --git a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs index 0ea6542a..c1c7fbf5 100644 --- a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs +++ b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs @@ -262,6 +262,12 @@ private static Models.Corpus Map(Serval.Translation.V1.Corpus source) TargetLanguage = source.TargetLanguage, TrainOnAll = source.TrainOnAll, PretranslateAll = source.PretranslateAll, + TrainOnChapters = source + .TrainOnChapters.Select(kvp => (kvp.Key, kvp.Value.Chapters.ToHashSet())) + .ToDictionary(), + PretranslateChapters = source + .PretranslateChapters.Select(kvp => (kvp.Key, kvp.Value.Chapters.ToHashSet())) + .ToDictionary(), TrainOnTextIds = source.TrainOnTextIds.ToHashSet(), PretranslateTextIds = source.PretranslateTextIds.ToHashSet(), SourceFiles = source.SourceFiles.Select(Map).ToList(), diff --git a/src/SIL.Machine/Scripture/ScriptureRangeParser.cs b/src/SIL.Machine/Scripture/ScriptureRangeParser.cs new file mode 100644 index 00000000..e3f78ba3 --- /dev/null +++ b/src/SIL.Machine/Scripture/ScriptureRangeParser.cs @@ -0,0 +1,243 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using SIL.Extensions; +using SIL.Scripture; + +public class ScriptureRangeParser +{ + private readonly Dictionary _bookLengths = new Dictionary(); + private static readonly Regex CommaSeparatedBooks = new Regex( + @"^([A-Z\d]{3}|OT|NT)(, ?([A-Z\d]{3}|OT|NT))*$", + RegexOptions.Compiled + ); + private static readonly Regex BookRange = new Regex(@"^-?[A-Z\d]{3}-[A-Z\d]{3}$", RegexOptions.Compiled); + private static readonly Regex ChapterSelection = new Regex( + @"^-?[A-Z\d]{3} ?(\d+|\d+-\d+)(, ?(\d+|\d+-\d+))*$", + RegexOptions.Compiled + ); + + public static Dictionary> GetChapters(string chapterSelections, ScrVers versification = null) + { + return new ScriptureRangeParser(versification).GetChapters(chapterSelections); + } + + public ScriptureRangeParser(ScrVers versification = null) + { + if (versification == null) + versification = ScrVers.Original; + foreach ((string bookId, int bookNum) in Canon.AllBookIds.Zip(Canon.AllBookNumbers)) + { + _bookLengths[bookId] = versification.GetLastChapter(bookNum); + } + } + + private Dictionary> ParseSection(string section) + { + section = section.Trim(); + Dictionary> chaptersPerBook = new Dictionary>(); + + //*Specific chapters from one book* + if (char.IsDigit(section.Last())) + { + string bookName = section.Substring(0, 3); + if (!_bookLengths.ContainsKey(bookName)) + { + throw new ArgumentException($"{bookName} is an invalid book ID."); + } + + HashSet chapters = new HashSet(); + + int lastChapter = _bookLengths[bookName]; + string[] chapterRangeStrings = section.Substring(3).Split(','); + foreach (string chapterRangeString in chapterRangeStrings.Select(s => s.Trim())) + { + if (chapterRangeString.Contains('-')) + { + string[] startAndEnd = chapterRangeString.Split('-'); + int start, + end; + if (!(int.TryParse(startAndEnd[0], out start) && int.TryParse(startAndEnd[1], out end))) + { + throw new ArgumentException($"{chapterRangeString} is an invalid chapter range."); + } + if (start == 0 || end > lastChapter || end <= start) + { + throw new ArgumentException($"{chapterRangeString} is an invalid chapter range."); + } + for (int chapterNum = start; chapterNum <= end; chapterNum++) + { + chapters.Add(chapterNum); + } + } + else + { + int chapterNum; + if (!int.TryParse(chapterRangeString, out chapterNum)) + { + throw new ArgumentException($"{section} is an invalid chapter number."); + } + if (chapterNum > lastChapter) + { + throw new ArgumentException($"{section} is an invalid chapter number."); + } + chapters.Add(chapterNum); + } + } + if (chapters.Count() == lastChapter) + { + chaptersPerBook[bookName] = new List(); + } + else + { + chaptersPerBook[bookName] = chapters.ToList(); + chaptersPerBook[bookName].Sort(); + } + } + //*Ranges of books to be added* + else if (section.Contains('-')) + { + string[] startAndEnd = section.Split('-'); + if ( + startAndEnd.Length != 2 + || !_bookLengths.ContainsKey(startAndEnd[0]) + || !_bookLengths.ContainsKey(startAndEnd[1]) + || Canon.BookIdToNumber(startAndEnd[1]) <= Canon.BookIdToNumber(startAndEnd[0]) + ) + { + throw new ArgumentException($"{section} is an invalid book range."); + } + for ( + int bookNum = Canon.BookIdToNumber(startAndEnd[0]); + bookNum <= Canon.BookIdToNumber(startAndEnd[1]); + bookNum++ + ) + { + chaptersPerBook[Canon.BookNumberToId(bookNum)] = new List(); + } + } + //*OT* + else if (section == "OT") + { + for (int bookNum = 1; bookNum <= 39; bookNum++) + { + chaptersPerBook[Canon.BookNumberToId(bookNum)] = new List(); + } + } + //*NT* + else if (section == "NT") + { + for (int bookNum = 40; bookNum <= 66; bookNum++) + { + chaptersPerBook[Canon.BookNumberToId(bookNum)] = new List(); + } + } + //*Whole book* + else + { + if (!_bookLengths.ContainsKey(section)) + { + throw new ArgumentException($"{section} is an invalid book ID."); + } + chaptersPerBook[section] = new List(); + } + + return chaptersPerBook; + } + + public Dictionary> GetChapters(string chapterSelections) + { + Dictionary> chaptersPerBook = new Dictionary>(); + chapterSelections = chapterSelections.Trim(); + + char delimiter = ';'; + if (chapterSelections.Contains(';')) + { + delimiter = ';'; + } + else if (CommaSeparatedBooks.IsMatch(chapterSelections)) + { + delimiter = ','; + } + else if (!BookRange.IsMatch(chapterSelections) && !ChapterSelection.IsMatch(chapterSelections)) + { + throw new ArgumentException( + "Invalid syntax. If you are providing multiple selections, e.g. a range of books followed by a selection of chapters from a book, separate each selection with a semicolon." + ); + } + string[] selections = chapterSelections.Split(delimiter); + foreach (string section in selections.Select(s => s.Trim())) + { + //*Subtraction* + if (section.StartsWith("-")) + { + Dictionary> sectionChapters = ParseSection(section.Substring(1)); + foreach (string bookName in sectionChapters.Keys) + { + if (!chaptersPerBook.ContainsKey(bookName)) + { + throw new ArgumentException( + $"{bookName} cannot be removed as it is not in the existing book selection." + ); + } + + if (sectionChapters[bookName].Count() == 0) + { + sectionChapters[bookName] = Enumerable.Range(1, _bookLengths[bookName]).ToList(); + } + + if (chaptersPerBook[bookName].Count() == 0) + { + chaptersPerBook[bookName] = Enumerable.Range(1, _bookLengths[bookName]).ToList(); + } + + foreach (int chapterNumber in sectionChapters[bookName]) + { + if (!chaptersPerBook[bookName].Remove(chapterNumber)) + { + throw new ArgumentException( + $"{chapterNumber} cannot be removed as it is not in the existing chapter selection." + ); + } + } + + if (chaptersPerBook[bookName].Count() == 0) + { + chaptersPerBook.Remove(bookName); + } + } + } + //*Addition* + else + { + Dictionary> sectionChapters = ParseSection(section); + foreach (string bookName in sectionChapters.Keys) + { + if (chaptersPerBook.ContainsKey(bookName)) + { + if (chaptersPerBook[bookName].Count() == 0 || sectionChapters[bookName].Count() == 0) + { + chaptersPerBook[bookName] = new List(); + continue; + } + chaptersPerBook[bookName] = chaptersPerBook[bookName] + .Concat(sectionChapters[bookName]) + .Distinct() + .ToList(); + chaptersPerBook[bookName].Sort(); + if (chaptersPerBook[bookName].Count() == _bookLengths[bookName]) + { + chaptersPerBook[bookName] = new List(); + } + } + else + { + chaptersPerBook[bookName] = sectionChapters[bookName]; + } + } + } + } + return chaptersPerBook; + } +} diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/NmtPreprocessBuildJobTests.cs b/tests/SIL.Machine.AspNetCore.Tests/Services/NmtPreprocessBuildJobTests.cs index 3e01dccf..0868ba26 100644 --- a/tests/SIL.Machine.AspNetCore.Tests/Services/NmtPreprocessBuildJobTests.cs +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/NmtPreprocessBuildJobTests.cs @@ -10,12 +10,17 @@ public void SetUp() Path.Combine("..", "..", "..", "Services", "data", "paratext"), Path.Combine(Path.GetTempPath(), "Project.zip") ); + ZipFile.CreateFromDirectory( + Path.Combine("..", "..", "..", "Services", "data", "paratext2"), + Path.Combine(Path.GetTempPath(), "Project2.zip") + ); } [TearDown] public void TearDown() { File.Delete(Path.Combine(Path.GetTempPath(), "Project.zip")); + File.Delete(Path.Combine(Path.GetTempPath(), "Project2.zip")); } [Test] @@ -143,6 +148,131 @@ int numEntriesWrittenToPretranslate } } + [Test] + [TestCase("MAT", "1CH", 23, 4)] + [TestCase("NT;LEV", "1CH", 25, 4)] + [TestCase("OT", "MRK", 10, 0)] + [TestCase("OT", "MLK", 0, 0, true)] + public async Task BuildJobTest_Chapterlevel( + string trainOnBiblicalRangeChapters, + string pretranslateBiblicalRangeChapters, + int numLinesWrittenToTrain, + int numEntriesWrittenToPretranslate, + bool throwsException = false + ) + { + using var env = new TestEnvironment(); + var parser = new ScriptureRangeParser(); + + Corpus corpus1 = new Corpus(); + if (throwsException) + { + Assert.Throws(() => + { + corpus1 = new Corpus + { + Id = "corpusId1", + SourceLanguage = "en", + TargetLanguage = "es", + PretranslateAll = false, + TrainOnAll = false, + PretranslateChapters = parser + .GetChapters(pretranslateBiblicalRangeChapters) + .Select(kvp => (kvp.Key, kvp.Value.ToHashSet())) + .ToDictionary(), + TrainOnChapters = parser + .GetChapters(trainOnBiblicalRangeChapters) + .Select(kvp => (kvp.Key, kvp.Value.ToHashSet())) + .ToDictionary(), + PretranslateTextIds = new HashSet(), + TrainOnTextIds = new HashSet(), + SourceFiles = new List + { + new CorpusFile + { + TextId = "textId1", + Format = FileFormat.Paratext, + Location = Path.Combine(Path.GetTempPath(), "Project.zip") + } + }, + TargetFiles = new List + { + new CorpusFile + { + TextId = "textId1", + Format = FileFormat.Paratext, + Location = Path.Combine(Path.GetTempPath(), "Project2.zip") + } + } + }; + }); + return; + } + else + { + corpus1 = new Corpus + { + Id = "corpusId1", + SourceLanguage = "en", + TargetLanguage = "es", + PretranslateAll = false, + TrainOnAll = false, + PretranslateChapters = parser + .GetChapters(pretranslateBiblicalRangeChapters) + .Select(kvp => (kvp.Key, kvp.Value.ToHashSet())) + .ToDictionary(), + TrainOnChapters = parser + .GetChapters(trainOnBiblicalRangeChapters) + .Select(kvp => (kvp.Key, kvp.Value.ToHashSet())) + .ToDictionary(), + PretranslateTextIds = new HashSet(), + TrainOnTextIds = new HashSet(), + SourceFiles = new List + { + new CorpusFile + { + TextId = "textId1", + Format = FileFormat.Paratext, + Location = Path.Combine(Path.GetTempPath(), "Project.zip") + } + }, + TargetFiles = new List + { + new CorpusFile + { + TextId = "textId1", + Format = FileFormat.Paratext, + Location = Path.Combine(Path.GetTempPath(), "Project2.zip") + } + } + }; + } + var corpora = new ReadOnlyList(new List { corpus1 }); + await env.BuildJob.RunAsync("engine1", "build1", corpora, "{\"use_key_terms\":false}", default); + using (var stream = await env.SharedFileService.OpenReadAsync("builds/build1/train.src.txt")) + { + using (var reader = new StreamReader(stream)) + { + //Split yields one more segment that there are new lines; thus, the "- 1" + string text = reader.ReadToEnd(); + Assert.That(text.Split("\n").Length - 1, Is.EqualTo(numLinesWrittenToTrain), text); + } + } + using (var stream = await env.SharedFileService.OpenReadAsync("builds/build1/pretranslate.src.json")) + { + using (var reader = new StreamReader(stream)) + { + JsonArray? pretranslationJsonObject = JsonSerializer.Deserialize(reader.ReadToEnd()); + Assert.NotNull(pretranslationJsonObject); + Assert.That( + pretranslationJsonObject!.ToList().Count, + Is.EqualTo(numEntriesWrittenToPretranslate), + JsonSerializer.Serialize(pretranslationJsonObject) + ); + } + } + } + private class TestEnvironment : DisposableBase { public ISharedFileService SharedFileService { get; } diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/41MATTen.SFM b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/41MATTen.SFM new file mode 100644 index 00000000..83a1f679 --- /dev/null +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/41MATTen.SFM @@ -0,0 +1,40 @@ +\id MAT - Test +\h Matthew +\mt Matthew +\ip An introduction to Matthew +\c 1 +\s Chapter One +\v 1 Chapter \pn one\+pro WON\+pro*\pn*, verse one.\f + \fr 1:1: \ft This is a footnote.\f* +\li1 +\v 2 \bd C\bd*hapter one, +\li2 verse\f + \fr 1:2: \ft This is a footnote.\f* two. +\v 3 Chapter one, +\li2 verse three. +\v 4 Chapter one,  +\li2 verse four, +\v 5 Chapter one, +\li2 verse \fig Figure 1|src="image1.png" size="col" ref="1:5"\fig* five. +\c 2 +\s1 Chapter Two +\p +\v 1 Chapter \add two\add*, verse \f + \fr 2:1: \ft This is a footnote.\f*one. +\v 2-3 Chapter two, verse \fm ∆\fm*two. +\v 3-4a Chapter two, verse \w three|lemma\w*. +\v 4b Chapter two, verse four. +\p +\v 6 Chapter two, verse \w six|strong="12345" \w*. +\v 6 Bad verse. +\v 5 Chapter two, verse five \rq (MAT 3:1)\rq*. +\v 7a Chapter two, verse seven A, +\s Section header +\p +\v 7b verse seven B. +\p +\v 8 This is a list: +\b +\tr \tc1 +\v 9 Chapter\tcr2 2\tc3 verse\tcr4 9 +\tr \tc1-2 +\v 10 \tc3-4 Chapter 2 verse 10 +\v 11-12 +\restore restore information diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/42MRKTen.SFM b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/42MRKTen.SFM new file mode 100644 index 00000000..46000963 --- /dev/null +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/42MRKTen.SFM @@ -0,0 +1,4 @@ +\id MRK - Test +\h Mark +\mt Mark +\ip An introduction to Mark \ No newline at end of file diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/ProjectBiblicalTerms.xml b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/ProjectBiblicalTerms.xml new file mode 100644 index 00000000..8bdbc4d2 --- /dev/null +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/ProjectBiblicalTerms.xml @@ -0,0 +1,6 @@ + + + PN + Abba + + \ No newline at end of file diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/Settings.xml b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/Settings.xml new file mode 100644 index 00000000..268bde64 --- /dev/null +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/Settings.xml @@ -0,0 +1,34 @@ + + usfm.sty + 4 + en::: + English + 8.0.100.76 + Test2 + 65001 + T + + NFC + Ten + a7e0b3ce0200736062f9f810a444dbfbe64aca35 + Charis SIL + 12 + + + + 41MAT + + Ten.SFM + Project:Ten:ProjectBiblicalTerms.xml + F + F + F + Public + Standard:: + + 3 + 000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + 000000000000000000000000000000000000001100000000000000000000000000000000000000000000000000000000000000000000000000000000000 + + + \ No newline at end of file diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/TermRenderings.xml b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/TermRenderings.xml new file mode 100644 index 00000000..debd73df --- /dev/null +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/TermRenderings.xml @@ -0,0 +1,9 @@ + + + Abba + + + + + + diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/custom.vrs b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/custom.vrs new file mode 100644 index 00000000..9c1cd387 --- /dev/null +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/data/paratext2/custom.vrs @@ -0,0 +1,31 @@ +# custom.vrs + +LEV 14:56 +ROM 14:26 +REV 12:17 +TOB 5:22 +TOB 10:12 +SIR 23:28 +ESG 1:22 +ESG 3:15 +ESG 5:14 +ESG 8:17 +ESG 10:14 +SIR 33:33 +SIR 41:24 +BAR 1:22 +4MA 7:25 +4MA 12:20 + +# deliberately missing verses +-ROM 16:26 +-ROM 16:27 +-3JN 1:15 +-S3Y 1:49 +-ESG 4:6 +-ESG 9:5 +-ESG 9:30 + +LEV 14:55 = LEV 14:55 +LEV 14:55 = LEV 14:56 +LEV 14:56 = LEV 14:57 diff --git a/tests/SIL.Machine.AspNetCore.Tests/Usings.cs b/tests/SIL.Machine.AspNetCore.Tests/Usings.cs index 222a7a74..2a16362c 100644 --- a/tests/SIL.Machine.AspNetCore.Tests/Usings.cs +++ b/tests/SIL.Machine.AspNetCore.Tests/Usings.cs @@ -21,4 +21,5 @@ global using SIL.Machine.Translation; global using SIL.Machine.Utils; global using SIL.ObjectModel; +global using SIL.Scripture; global using SIL.WritingSystems; diff --git a/tests/SIL.Machine.Tests/Scripture/ScriptureRangeParserTests.cs b/tests/SIL.Machine.Tests/Scripture/ScriptureRangeParserTests.cs new file mode 100644 index 00000000..4330f89e --- /dev/null +++ b/tests/SIL.Machine.Tests/Scripture/ScriptureRangeParserTests.cs @@ -0,0 +1,205 @@ +using NUnit.Framework; +using SIL.Scripture; + +namespace SIL.Machine; + +[TestFixture] +public class ScriptureRangeParserTests +{ + [Test] + [TestCaseSource(nameof(GetCases))] + public void TestParse(string rangeString, Dictionary> expectedOutput, bool throwsException) + { + var parser = new ScriptureRangeParser(); + if (!throwsException) + { + Assert.That(parser.GetChapters(rangeString), Is.EquivalentTo(expectedOutput)); + } + else + { + Assert.Throws(() => + { + parser.GetChapters(rangeString); + }); + } + } + + public static IEnumerable GetCases() + { + yield return new TestCaseData("MAL", new Dictionary> { { "MAL", new List() } }, false); + yield return new TestCaseData( + "GEN,EXO", + new Dictionary> { { "GEN", new List() }, { "EXO", new List() } }, + false + ); + yield return new TestCaseData( + "1JN,2JN", + new Dictionary> { { "1JN", new List() }, { "2JN", new List() } }, + false + ); + yield return new TestCaseData( + "OT", + Enumerable.Range(1, 39).Select(i => (Canon.BookNumberToId(i), new List())).ToDictionary(), + false + ); + yield return new TestCaseData( + "NT", + Enumerable.Range(40, 27).Select(i => (Canon.BookNumberToId(i), new List())).ToDictionary(), + false + ); + yield return new TestCaseData( + "NT,OT", + Enumerable.Range(1, 66).Select(i => (Canon.BookNumberToId(i), new List())).ToDictionary(), + false + ); + yield return new TestCaseData( + "MAT;MRK", + new Dictionary> { { "MAT", new List() }, { "MRK", new List() } }, + false + ); + yield return new TestCaseData( + "MAT; MRK", + new Dictionary> { { "MAT", new List() }, { "MRK", new List() } }, + false + ); + yield return new TestCaseData( + "MAT1,2,3", + new Dictionary> + { + { + "MAT", + new List() { 1, 2, 3 } + } + }, + false + ); + yield return new TestCaseData( + "MAT1, 2, 3", + new Dictionary> + { + { + "MAT", + new List() { 1, 2, 3 } + } + }, + false + ); + yield return new TestCaseData( + "MAT-LUK", + new Dictionary> + { + { "MAT", new List() }, + { "MRK", new List() }, + { "LUK", new List() } + }, + false + ); + yield return new TestCaseData( + "MAT1,2,3;MAT-LUK", + new Dictionary> + { + { "MAT", new List() }, + { "MRK", new List() }, + { "LUK", new List() } + }, + false + ); + yield return new TestCaseData( + "2JN-3JN;EXO1,8,3-5;GEN", + new Dictionary> + { + { "GEN", new List() }, + { + "EXO", + new List() { 1, 3, 4, 5, 8 } + }, + { "2JN", new List() }, + { "3JN", new List() } + }, + false + ); + yield return new TestCaseData( + "1JN 1;1JN 2;1JN 3-5", + new Dictionary> { { "1JN", new List() } }, + false + ); + yield return new TestCaseData( + "MAT-ROM;-ACT4-28", + new Dictionary> + { + { "MAT", new List() }, + { "MRK", new List() }, + { "LUK", new List() }, + { "JHN", new List() }, + { + "ACT", + new List() { 1, 2, 3 } + }, + { "ROM", new List() } + }, + false + ); + yield return new TestCaseData("2JN;-2JN 1", new Dictionary> { }, false); + yield return new TestCaseData( + "NT;OT;-MRK;-EXO", + Enumerable + .Range(1, 66) + .Where(i => i != 2 && i != 41) + .Select(i => (Canon.BookNumberToId(i), new List())) + .ToDictionary(), + false + ); + yield return new TestCaseData( + "NT;-MAT3-5,17;-REV21,22", + Enumerable + .Range(40, 27) + .Select(i => + { + if (i == 40) + { + return ( + Canon.BookNumberToId(i), + Enumerable.Range(1, 28).Where(c => !(c == 3 || c == 4 || c == 5 || c == 17)).ToList() + ); + } + if (i == 66) + { + return (Canon.BookNumberToId(i), Enumerable.Range(1, 20).ToList()); + } + return (Canon.BookNumberToId(i), new List()); + }) + .ToDictionary(), + false + ); + yield return new TestCaseData( + "MAT-JHN;-MAT-LUK", + new Dictionary> { { "JHN", new List() } }, + false + ); + + //*Throw exceptions + yield return new TestCaseData("MAT3-1", new Dictionary>(), true); + yield return new TestCaseData("MRK-MAT", new Dictionary>(), true); + yield return new TestCaseData("MRK;-MRK10-3", new Dictionary>(), true); + yield return new TestCaseData("MAT0-10", new Dictionary>(), true); + yield return new TestCaseData("MAT-FLUM", new Dictionary>(), true); + yield return new TestCaseData("-MAT-FLUM", new Dictionary>(), true); + yield return new TestCaseData("", new Dictionary>(), true); + yield return new TestCaseData("ABC", new Dictionary>(), true); + yield return new TestCaseData("MAT-ABC", new Dictionary>(), true); + yield return new TestCaseData("NT;-ABC-LUK", new Dictionary>(), true); + yield return new TestCaseData("MAT 500", new Dictionary>(), true); + yield return new TestCaseData("MAT 1-500", new Dictionary>(), true); + yield return new TestCaseData("MAT;-MAT 300-500", new Dictionary>(), true); + yield return new TestCaseData("-MRK", new Dictionary>(), true); + yield return new TestCaseData("-MRK 1", new Dictionary>(), true); + yield return new TestCaseData("MRK 2-5;-MRK 1-4", new Dictionary>(), true); + yield return new TestCaseData("MRK 2-5;-MRK 6", new Dictionary>(), true); + yield return new TestCaseData("OT;-MRK-LUK", new Dictionary>(), true); + yield return new TestCaseData("NT;OT;-ABC", new Dictionary>(), true); + yield return new TestCaseData("MAT;-ABC 1", new Dictionary>(), true); + yield return new TestCaseData("NT,OT,-MRK,-EXO", new Dictionary>(), true); + yield return new TestCaseData("OT,MAT1", new Dictionary>(), true); + yield return new TestCaseData("OT,MAT-LUK", new Dictionary>(), true); + } +}