-
-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added chapter-level filtering; fixes sillsdev/serval#150
- Loading branch information
Showing
13 changed files
with
472 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
165 changes: 165 additions & 0 deletions
165
src/SIL.Machine.AspNetCore/Services/BiblicalRangeStringParser.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
class BiblicalRangeStringParser { | ||
private readonly Dictionary<string, int> _bookLengths = []; | ||
private static readonly Regex CommaSeparatedBooks = new Regex(@"^([A-Z\d]{3}|OT|NT)(, ?([A-Z\d]{3}|OT|NT))*$", RegexOptions.Compiled); | ||
private static readonly Regex BookRange = new Regex(@"^-?[A-Z\d]{3}-[A-Z\d]{3}$", RegexOptions.Compiled); | ||
private static readonly Regex ChapterSelection = new Regex(@"^-?[A-Z\d]{3} ?(\d+|\d+-\d+)(, ?(\d+|\d+-\d+))*$", RegexOptions.Compiled); | ||
|
||
|
||
public BiblicalRangeStringParser(ScrVers? versification = null){ | ||
versification ??= ScrVers.Original; | ||
foreach((string bookId, int bookNum) in Canon.AllBookIds.Zip(Canon.AllBookNumbers)){ | ||
_bookLengths[bookId] = versification.GetLastChapter(bookNum); | ||
} | ||
} | ||
|
||
private Dictionary<string, List<int>> ParseSection(string section){ | ||
section = section.Trim(); | ||
Dictionary<string, List<int>> chaptersPerBook = []; | ||
|
||
//*Specific chapters from one book* | ||
if (char.IsAsciiDigit(section.Last())){ | ||
string bookName = section[..3]; | ||
if (!_bookLengths.ContainsKey(bookName)){ | ||
throw new ArgumentException($"{bookName} is an invalid book ID."); | ||
} | ||
|
||
HashSet<int> chapters = []; | ||
|
||
int lastChapter = _bookLengths[bookName]; | ||
string[] chapterRangeStrings = section[3..].Split(','); | ||
foreach(string chapterRangeString in chapterRangeStrings.Select(s => s.Trim())){ | ||
if(chapterRangeString.Contains('-')){ | ||
string[] startAndEnd = chapterRangeString.Split('-'); | ||
int start, end; | ||
if (!(int.TryParse(startAndEnd[0], out start) && int.TryParse(startAndEnd[1], out end))){ | ||
throw new ArgumentException($"{chapterRangeString} is an invalid chapter range."); | ||
} | ||
if (start == 0 || end > lastChapter || end <= start){ | ||
throw new ArgumentException($"{chapterRangeString} is an invalid chapter range."); | ||
} | ||
for(int chapterNum = start; chapterNum <= end; chapterNum++){ | ||
chapters.Add(chapterNum); | ||
} | ||
} | ||
else { | ||
int chapterNum; | ||
if (!int.TryParse(chapterRangeString, out chapterNum)){ | ||
throw new ArgumentException($"{section} is an invalid chapter number."); | ||
} | ||
if (chapterNum > lastChapter){ | ||
throw new ArgumentException($"{section} is an invalid chapter number."); | ||
} | ||
chapters.Add(chapterNum); | ||
} | ||
} | ||
if (chapters.Count() == lastChapter){ | ||
chaptersPerBook[bookName] = []; | ||
} | ||
else { | ||
chaptersPerBook[bookName] = chapters.ToList(); | ||
chaptersPerBook[bookName].Sort(); | ||
} | ||
} | ||
//*Ranges of books to be added* | ||
else if(section.Contains('-')){ | ||
string[] startAndEnd = section.Split("-"); | ||
if (startAndEnd.Length != 2 || !_bookLengths.ContainsKey(startAndEnd[0]) || !_bookLengths.ContainsKey(startAndEnd[1]) || Canon.BookIdToNumber(startAndEnd[1]) <= Canon.BookIdToNumber(startAndEnd[0])){ | ||
throw new ArgumentException($"{section} is an invalid book range."); | ||
} | ||
for(int bookNum = Canon.BookIdToNumber(startAndEnd[0]); bookNum <= Canon.BookIdToNumber(startAndEnd[1]); bookNum++){ | ||
chaptersPerBook[Canon.BookNumberToId(bookNum)] = []; | ||
} | ||
} | ||
//*OT* | ||
else if(section == "OT"){ | ||
for(int bookNum = 1; bookNum <= 39; bookNum++){ | ||
chaptersPerBook[Canon.BookNumberToId(bookNum)] = []; | ||
} | ||
} | ||
//*NT* | ||
else if(section == "NT"){ | ||
for(int bookNum = 40; bookNum <= 66; bookNum++){ | ||
chaptersPerBook[Canon.BookNumberToId(bookNum)] = []; | ||
} | ||
} | ||
//*Whole book* | ||
else { | ||
if(!_bookLengths.ContainsKey(section)){ | ||
throw new ArgumentException($"{section} is an invalid book ID."); | ||
} | ||
chaptersPerBook[section] = []; | ||
} | ||
|
||
return chaptersPerBook; | ||
} | ||
|
||
public Dictionary<string, List<int>> Parse(string chapterSelections){ | ||
Dictionary<string, List<int>> chaptersPerBook = []; | ||
chapterSelections = chapterSelections.Trim(); | ||
|
||
char delimiter = ';'; | ||
if(chapterSelections.Contains(';')){ | ||
delimiter = ';'; | ||
} | ||
else if (CommaSeparatedBooks.IsMatch(chapterSelections)){ | ||
delimiter = ','; | ||
} | ||
else if (!BookRange.IsMatch(chapterSelections) && ! ChapterSelection.IsMatch(chapterSelections)){ | ||
throw new ArgumentException("Invalid syntax. If you are providing multiple selections, e.g. a range of books followed by a selection of chapters from a book, separate each selection with a semicolon."); | ||
} | ||
string[] selections = chapterSelections.Split(delimiter); | ||
foreach (string section in selections.Select(s => s.Trim())){ | ||
|
||
//*Subtraction* | ||
if (section.StartsWith('-')){ | ||
Dictionary<string, List<int>> sectionChapters = ParseSection(section[1..]); | ||
foreach(string bookName in sectionChapters.Keys){ | ||
if (!chaptersPerBook.ContainsKey(bookName)){ | ||
throw new ArgumentException($"{bookName} cannot be removed as it is not in the existing book selection."); | ||
} | ||
|
||
if (sectionChapters[bookName].Count() == 0){ | ||
sectionChapters[bookName] = Enumerable.Range(1, _bookLengths[bookName]).ToList(); | ||
} | ||
|
||
if (chaptersPerBook[bookName].Count() == 0){ | ||
chaptersPerBook[bookName] = Enumerable.Range(1, _bookLengths[bookName]).ToList(); | ||
} | ||
|
||
foreach(int chapterNumber in sectionChapters[bookName]){ | ||
if(!chaptersPerBook[bookName].Remove(chapterNumber)){ | ||
throw new ArgumentException($"{chapterNumber} cannot be removed as it is not in the existing chapter selection."); | ||
} | ||
} | ||
|
||
if (chaptersPerBook[bookName].Count() == 0){ | ||
chaptersPerBook.Remove(bookName); | ||
} | ||
} | ||
} | ||
|
||
//*Addition* | ||
else { | ||
Dictionary<string,List<int>> sectionChapters = ParseSection(section); | ||
foreach(string bookName in sectionChapters.Keys){ | ||
if (chaptersPerBook.ContainsKey(bookName)){ | ||
if(chaptersPerBook[bookName].Count() == 0 || sectionChapters[bookName].Count() == 0){ | ||
chaptersPerBook[bookName] = []; | ||
continue; | ||
} | ||
chaptersPerBook[bookName] = chaptersPerBook[bookName].Concat(sectionChapters[bookName]).Distinct().ToList(); | ||
chaptersPerBook[bookName].Sort(); | ||
if(chaptersPerBook[bookName].Count() == _bookLengths[bookName]){ | ||
chaptersPerBook[bookName] = []; | ||
} | ||
} | ||
else { | ||
chaptersPerBook[bookName] = sectionChapters[bookName]; | ||
} | ||
|
||
} | ||
} | ||
} | ||
return chaptersPerBook; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
78 changes: 78 additions & 0 deletions
78
tests/SIL.Machine.AspNetCore.Tests/Services/BiblicalRangeStringParserTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
namespace SIL.Machine.AspNetCore.Services; | ||
|
||
[TestFixture] | ||
public class BiblicalRangeStringParserTests { | ||
|
||
[Test] | ||
[TestCaseSource(nameof(GetCases))] | ||
public void TestParse(string rangeString, Dictionary<string, List<int>> expectedOutput, bool throwsException){ | ||
var parser = new BiblicalRangeStringParser(); | ||
if(!throwsException){ | ||
Assert.That(parser.Parse(rangeString), Is.EquivalentTo(expectedOutput)); | ||
} | ||
else { | ||
Assert.Throws<ArgumentException>(() => { | ||
parser.Parse(rangeString); | ||
}); | ||
} | ||
} | ||
|
||
public static IEnumerable<TestCaseData> GetCases(){ | ||
yield return new TestCaseData("MAL", new Dictionary<string, List<int>>{ {"MAL" , new List<int>()}}, false); | ||
yield return new TestCaseData("GEN,EXO", new Dictionary<string, List<int>>{ {"GEN" , new List<int>()},{"EXO" , new List<int>()} }, false); | ||
yield return new TestCaseData("1JN,2JN", new Dictionary<string, List<int>>{ {"1JN" , new List<int>()},{"2JN" , new List<int>()} }, false); | ||
yield return new TestCaseData("OT", Enumerable.Range(1, 39).Select(i => (Canon.BookNumberToId(i), new List<int>())).ToDictionary(), false); | ||
yield return new TestCaseData("NT", Enumerable.Range(40, 27).Select(i => (Canon.BookNumberToId(i), new List<int>())).ToDictionary(), false); | ||
yield return new TestCaseData("NT,OT", Enumerable.Range(1, 66).Select(i => (Canon.BookNumberToId(i), new List<int>())).ToDictionary(), false); | ||
yield return new TestCaseData("MAT;MRK", new Dictionary<string, List<int>>{ {"MAT" , new List<int>()},{"MRK" , new List<int>()} }, false); | ||
yield return new TestCaseData("MAT; MRK", new Dictionary<string, List<int>>{ {"MAT" , new List<int>()},{"MRK" , new List<int>()} }, false); | ||
yield return new TestCaseData("MAT1,2,3", new Dictionary<string, List<int>>{ {"MAT" , new List<int>(){1,2,3}} }, false); | ||
yield return new TestCaseData("MAT1, 2, 3", new Dictionary<string, List<int>>{ {"MAT" , new List<int>(){1,2,3}} }, false); | ||
yield return new TestCaseData("MAT-LUK", new Dictionary<string, List<int>>{ {"MAT" , new List<int>()},{"MRK" , new List<int>()},{"LUK" , new List<int>()} }, false); | ||
yield return new TestCaseData("MAT1,2,3;MAT-LUK", new Dictionary<string, List<int>>{ {"MAT" , new List<int>()},{"MRK" , new List<int>()},{"LUK" , new List<int>()} }, false); | ||
yield return new TestCaseData("2JN-3JN;EXO1,8,3-5;GEN", new Dictionary<string, List<int>>{ {"GEN" , new List<int>()},{"EXO" , new List<int>(){1,3,4,5,8}},{"2JN" , new List<int>()},{"3JN" , new List<int>()} }, false); | ||
yield return new TestCaseData("1JN 1;1JN 2;1JN 3-5", new Dictionary<string, List<int>>{ {"1JN" , new List<int>()}}, false); | ||
yield return new TestCaseData("MAT-ROM;-ACT4-28", new Dictionary<string, List<int>>{ {"MAT" , new List<int>()},{"MRK" , new List<int>()},{"LUK" , new List<int>()},{"JHN" , new List<int>()},{"ACT" , new List<int>(){1,2,3}},{"ROM" , new List<int>()} }, false); | ||
yield return new TestCaseData("2JN;-2JN 1", new Dictionary<string, List<int>>{}, false); | ||
yield return new TestCaseData("NT;OT;-MRK;-EXO", Enumerable.Range(1, 66).Where(i => i != 2 && i!= 41).Select(i => (Canon.BookNumberToId(i), new List<int>())).ToDictionary(), false); | ||
yield return new TestCaseData("NT;-MAT3-5,17;-REV21,22", Enumerable.Range(40, 27).Select(i => { | ||
if (i == 40){ | ||
return (Canon.BookNumberToId(i), Enumerable.Range(1,28).Where(c => !(c == 3 || c == 4 || c == 5 || c== 17)).ToList()); | ||
} | ||
if (i == 66){ | ||
return (Canon.BookNumberToId(i), Enumerable.Range(1,20).ToList()); | ||
} | ||
return (Canon.BookNumberToId(i), new List<int>()); | ||
}).ToDictionary(), false); | ||
yield return new TestCaseData("MAT-JHN;-MAT-LUK", new Dictionary<string, List<int>>{ {"JHN" , new List<int>()} }, false); | ||
|
||
|
||
//*Throw exceptions | ||
yield return new TestCaseData("MAT3-1", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MRK-MAT", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MRK;-MRK10-3", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MAT0-10", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MAT-FLUM", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("-MAT-FLUM", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("ABC", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MAT-ABC", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("NT;-ABC-LUK", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MAT 500", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MAT 1-500", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MAT;-MAT 300-500", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("-MRK", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("-MRK 1", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MRK 2-5;-MRK 1-4", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MRK 2-5;-MRK 6", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("OT;-MRK-LUK", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("NT;OT;-ABC", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("MAT;-ABC 1", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("NT,OT,-MRK,-EXO", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("OT,MAT1", new Dictionary<string, List<int>>(), true); | ||
yield return new TestCaseData("OT,MAT-LUK", new Dictionary<string, List<int>>(), true); | ||
|
||
|
||
} | ||
|
||
} |
Oops, something went wrong.