Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LT-20351: Fix LT-20351 and implement Novel Root Guesser #245

Merged
merged 30 commits into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
59611e2
Add MaxUnapplications and GuessRoot
jtmaxwell3 Sep 9, 2024
3f7d114
Improve root guesser
jtmaxwell3 Sep 12, 2024
805fc1c
Fix formatting
jtmaxwell3 Sep 12, 2024
81e71ee
Fix formatting problems
jtmaxwell3 Sep 13, 2024
754d823
Fix formatting errors
jtmaxwell3 Sep 13, 2024
5077a02
Fix formatting errors and placement of Guessed
jtmaxwell3 Sep 13, 2024
d081bb7
Fix formatting
jtmaxwell3 Sep 13, 2024
dba5091
Fix format
jtmaxwell3 Sep 13, 2024
783af97
Fix formatting
jtmaxwell3 Sep 13, 2024
516aa7e
Fix formatting
jtmaxwell3 Sep 13, 2024
13fb92c
Change GetShapeNodes to parse lexical patterns
jtmaxwell3 Sep 16, 2024
2a4dba2
Fix formatting
jtmaxwell3 Sep 16, 2024
dbc8ecc
Fix formatting
jtmaxwell3 Sep 16, 2024
fc7ba12
Fix formatting
jtmaxwell3 Sep 16, 2024
3dcad10
Fix bugs in root guesser
jtmaxwell3 Sep 17, 2024
e7bd762
Avoid duplicate matches
jtmaxwell3 Sep 18, 2024
888a819
Fix formatting
jtmaxwell3 Sep 18, 2024
ad87375
Fix formatting
jtmaxwell3 Sep 18, 2024
3e6b25c
Fixes issues raised by Damien
jtmaxwell3 Sep 20, 2024
a7ee22d
Fix formatting
jtmaxwell3 Sep 20, 2024
6918e8b
Fix formatting
jtmaxwell3 Sep 20, 2024
d78b12f
Fix formatting
jtmaxwell3 Sep 20, 2024
c343fbc
Address Damien's concerns
jtmaxwell3 Sep 27, 2024
95da69e
Fix formatting
jtmaxwell3 Sep 27, 2024
ea9afd8
Fix formatting
jtmaxwell3 Sep 27, 2024
eaf3b42
Fix formatting
jtmaxwell3 Sep 27, 2024
aef043e
Fix formatting
jtmaxwell3 Sep 27, 2024
6e6edd7
Fix formatting
jtmaxwell3 Sep 27, 2024
9e5ea60
Replace Iterative with IsIterative and SetIterative
jtmaxwell3 Sep 30, 2024
2f824de
Fix formatting
jtmaxwell3 Sep 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/SIL.Machine.Morphology.HermitCrab/Allomorph.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ public IDictionary<string, object> Properties
get { return _properties; }
}

/// <summary>
/// Was this allomorph guessed by a lexical pattern?
/// </summary>
public bool Guessed { get; set; }

public bool FreeFluctuatesWith(Allomorph other)
{
if (this == other)
Expand Down
2 changes: 2 additions & 0 deletions src/SIL.Machine.Morphology.HermitCrab/AnalysisStratumRule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ public IEnumerable<Word> Apply(Word input)
output.Add(mruleOutWord);
if (_morpher.TraceManager.IsTracing)
_morpher.TraceManager.EndUnapplyStratum(_stratum, mruleOutWord);
if (_morpher.MaxUnapplications > 0 && output.Count >= _morpher.MaxUnapplications)
break;
}
return output;
}
Expand Down
108 changes: 98 additions & 10 deletions src/SIL.Machine.Morphology.HermitCrab/CharacterDefinitionTable.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ public class CharacterDefinitionTable : ICollection<CharacterDefinition>
{
private readonly Dictionary<string, CharacterDefinition> _charDefLookup;
private readonly HashSet<CharacterDefinition> _charDefs;
private readonly Dictionary<string, NaturalClass> _naturalClassLookup;

public CharacterDefinitionTable()
{
_charDefLookup = new Dictionary<string, CharacterDefinition>();
_charDefs = new HashSet<CharacterDefinition>();
_naturalClassLookup = new Dictionary<string, NaturalClass>();
}

public string Name { get; set; }
Expand All @@ -43,6 +45,11 @@ public CharacterDefinition AddBoundary(IEnumerable<string> strRep)
return Add(strRep, HCFeatureSystem.Boundary, null);
}

public void AddNaturalClass(NaturalClass naturalClass)
{
_naturalClassLookup[naturalClass.Name] = naturalClass;
}

/// <summary>
/// Adds the character definition.
/// </summary>
Expand Down Expand Up @@ -98,11 +105,14 @@ public IEnumerable<string> GetMatchingStrReps(ShapeNode node)
}
}

private bool GetShapeNodes(string str, out IEnumerable<ShapeNode> nodes, out int errorPos)
private bool GetShapeNodes(string str, bool allowPattern, out IEnumerable<ShapeNode> nodes, out int errorPos)
{
var nodesList = new List<ShapeNode>();
int i = 0;
string normalized = str.Normalize(NormalizationForm.FormD);
bool optional = false;
int optionalPos = 0;
int optionalCount = 0;
while (i < normalized.Length)
{
bool match = false;
Expand All @@ -120,26 +130,104 @@ private bool GetShapeNodes(string str, out IEnumerable<ShapeNode> nodes, out int
break;
}
}

if (!match)
if (match)
continue;
if (allowPattern)
{
nodes = null;
errorPos = i;
if (!str.IsNormalized(NormalizationForm.FormD))
errorPos = normalized.Substring(0, errorPos).Normalize().Length;
return false;
// Check for pattern language.
// NB: This only happens when the characters don't match.
// I thought about implementing this using Pattern<Shape, ShapeNode>,
// but the Matcher doesn't preserve the unifications of the nodes.
if (normalized[i] == '[')
{
// Example: [Seg].
// Look for a natural class.
int closePos = normalized.IndexOf("]", i);
if (closePos > 0)
{
string className = normalized.Substring(i + 1, closePos - i - 1);
if (_naturalClassLookup.ContainsKey(className))
{
NaturalClass naturalClass = _naturalClassLookup[className];
var node = new ShapeNode(naturalClass.FeatureStruct);
nodesList.Add(node);
i = closePos + 1;
continue;
}
}
}
else if (normalized[i] == '(')
{
if (i + 1 < normalized.Length && normalized[i + 1] == '[')
{
// The natural class that follows is optional.
// Wait for the close parenthesis to process.
optional = true;
optionalPos = i;
optionalCount = nodesList.Count;
i++;
continue;
}
}
else if (normalized[i] == ')')
{
if (optional && nodesList.Count == optionalCount + 1)
{
// Example: ([Seg]).
// Ill-formed: ([C][V]).
// Make the last node optional.
nodesList[nodesList.Count - 1].Annotation.Optional = true;
optional = false;
i++;
continue;
}
}
else if (normalized[i] == '*')
{
if (i > 0 && normalized[i - 1] == ']')
{
// Example: [Seg]*.
// Make the last node Kleene star.
nodesList[nodesList.Count - 1].Annotation.Optional = true;
nodesList[nodesList.Count - 1].SetIterative(true);
i++;
continue;
}
}
// Kleene plus doesn't work because '+' is a boundary marker.
}

// Failure
nodes = null;
errorPos = i;
if (!str.IsNormalized(NormalizationForm.FormD))
errorPos = normalized.Substring(0, errorPos).Normalize().Length;
return false;
}
if (optional)
{
// The open parenthesis didn't get closed.
nodes = null;
errorPos = optionalPos;
if (!str.IsNormalized(NormalizationForm.FormD))
errorPos = normalized.Substring(0, errorPos).Normalize().Length;
return false;
}
nodes = nodesList;
errorPos = -1;
return true;
}

public Shape Segment(string str)
{
return Segment(str, false);
}

public Shape Segment(string str, bool allowPattern)
{
IEnumerable<ShapeNode> nodes;
int errorPos;
if (GetShapeNodes(str, out nodes, out errorPos))
if (GetShapeNodes(str, allowPattern, out nodes, out errorPos))
{
var shape = new Shape(begin => new ShapeNode(
begin ? HCFeatureSystem.LeftSideAnchor : HCFeatureSystem.RightSideAnchor
Expand All @@ -162,7 +250,7 @@ public int TrySegment(string str, out Shape shape)
{
IEnumerable<ShapeNode> nodes;
int errorPos;
if (GetShapeNodes(str, out nodes, out errorPos))
if (GetShapeNodes(str, true, out nodes, out errorPos))
{
shape = new Shape(begin => new ShapeNode(
begin ? HCFeatureSystem.LeftSideAnchor : HCFeatureSystem.RightSideAnchor
Expand Down
13 changes: 13 additions & 0 deletions src/SIL.Machine.Morphology.HermitCrab/HermitCrabExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,19 @@ internal static void SetDeleted(this ShapeNode node, bool deleted)
);
}

internal static bool IsIterative(this ShapeNode node)
{
return node.Annotation.Data != null;
}

internal static void SetIterative(this ShapeNode node, bool iterative)
{
if (iterative)
node.Annotation.Data = iterative;
else
node.Annotation.Data = null;
}

private static readonly IEqualityComparer<ShapeNode> NodeComparer = new ProjectionEqualityComparer<
ShapeNode,
FeatureStruct
Expand Down
Loading
Loading