diff --git a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs index 09e5ecd7..841c8250 100644 --- a/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs +++ b/src/SIL.LCModel/DomainServices/AnalysisGuessServices.cs @@ -9,13 +9,17 @@ // using System.Collections.Generic; +using System.Configuration; +using System.Diagnostics; using System.Linq; using Icu; using SIL.LCModel.Core.KernelInterfaces; using SIL.LCModel.Core.Text; using SIL.LCModel.Core.WritingSystems; +using SIL.LCModel.DomainImpl; using SIL.LCModel.Infrastructure; + namespace SIL.LCModel.DomainServices { /// ---------------------------------------------------------------------------------------- @@ -32,6 +36,8 @@ public class AnalysisGuessServices public AnalysisGuessServices(LcmCache cache) { Cache = cache; + m_emptyWAG = new EmptyWAG(); + m_nullWAG = new NullWAG(); } /// @@ -55,71 +61,100 @@ public enum OpinionAgent LcmCache Cache { get; set; } - private IDictionary m_analysisApprovalTable; - /// - /// Table that has user opinions about analyses. - /// - IDictionary AnalysisApprovalTable + // PriorityCount provides a count of the number of times an analysis + // appears with the given priority (= human approved, parser approved, etc.). + // It is used to determine which analysis has higher priority. + class PriorityCount + { + public bool lowercased = false; // whether the word form of the analysis was lowercased + public int priority = 0; // the priority of the count + public int count = 0; + } + + // First key of m_guessTable = word form (or analysis). + // Second key of m_guessTable = previous word form (including m_nullWAG for unknown). + // Final value of m_guessTable = default analysis (or gloss). + private IDictionary> m_guessTable; + IDictionary> GuessTable { get { - if (m_analysisApprovalTable == null) - LoadAnalysisApprovalTable(); - return m_analysisApprovalTable; + if (m_guessTable == null) + GuessTable = new Dictionary>(); + return m_guessTable; } - set { m_analysisApprovalTable = value; } + set { m_guessTable = value; } } - private HashSet m_computerApprovedTable; - /// - /// Table for which analyses have been approved by a computer (i.e. for matching words to Entries) - /// - HashSet ComputerApprovedTable + // CaselessGuessTable is like GuessTable, but for uppercase word forms that can have lowercase analyses. + private IDictionary> m_caselessGuessTable; + IDictionary> CaselessGuessTable { get { - if (m_computerApprovedTable == null) - LoadComputerApprovedTable(); - return m_computerApprovedTable; + if (m_caselessGuessTable == null) + CaselessGuessTable = new Dictionary>(); + return m_caselessGuessTable; } - set { m_computerApprovedTable = value; } + set { m_caselessGuessTable = value; } } - private HashSet m_parserApprovedTable; + private readonly IAnalysis m_emptyWAG; // Represents an empty word form. + private readonly IAnalysis m_nullWAG; // Represents an unknown word form. + /// - /// Table for which analyses have been approved by grammatical parser + /// an empty object for a WAG modelled after NullWAG + /// EmptyWAG represents the previous word form of the first word of a sentence. /// - HashSet ParserApprovedTable + public class EmptyWAG : NullCmObject, IAnalysis { - get + #region IAnalysis Members + + /// + /// + /// + public IWfiWordform Wordform { - if (m_parserApprovedTable == null) - LoadParserApprovedTable(); - return m_parserApprovedTable; + get { return null; } } - set { m_parserApprovedTable = value; } - } - private IDictionary m_guessTable; - IDictionary GuessTable - { - get + /// + /// + /// + public bool HasWordform { - if (m_guessTable == null) - LoadGuessTable(); - return m_guessTable; + get { return false; } } - set { m_guessTable = value; } + + /// + /// + /// + public IWfiAnalysis Analysis + { + get { return null; } + } + + /// + /// + /// + /// + /// + public ITsString GetForm(int ws) + { + return null; + } + + #endregion } /// /// Informs the guess service that the indicated occurrence is being replaced with the specified new - /// analysis. If necessary clear the GuessTable. If possible update it. The most common and - /// performance-critical case is confirming a guess. Return true if the cache was changed. + /// analysis. If necessary clear the GuessTable. If possible update it. + /// Return true if the cache was changed. /// public bool UpdatingOccurrence(IAnalysis oldAnalysis, IAnalysis newAnalysis) { - if (m_guessTable == null) + if (m_guessTable == null && m_caselessGuessTable == null) return false; // already cleared, forget it. if (oldAnalysis == newAnalysis) return false; // nothing changed, no problem. @@ -134,44 +169,35 @@ public bool UpdatingOccurrence(IAnalysis oldAnalysis, IAnalysis newAnalysis) } if (newAnalysis is IWfiWordform || newAnalysis.Wordform == null) return false; // unlikely but doesn't mess up our guesses. - var result = false; - // if the new analysis is NOT the guess for one of its owners, one more occurrence might - // make it the guess, so we need to regenerate. - IAnalysis currentDefault; - if (!m_guessTable.TryGetValue(newAnalysis.Wordform, out currentDefault)) - { - // We have no guess for this wordform: the new analysis becomes it. - m_guessTable[newAnalysis.Wordform] = newAnalysis; - result = true; // we didn't clear the cache but did change it. - } - else if (currentDefault != newAnalysis) + if (newAnalysis.Wordform != oldAnalysis) { - // Some other analysis just became more common...maybe now the default? + // The wordform changed, probably because a lowercase analysis was used. + // This changes the previous word form of the next word, which is unknown to us. ClearGuessData(); return true; } - if (newAnalysis is IWfiAnalysis) - return result; - if (!m_guessTable.TryGetValue(newAnalysis.Analysis, out currentDefault)) + var result = false; + // Remove the word form from the guess tables. + if (GuessTable.ContainsKey(oldAnalysis)) { - // We have no guess for this analysis: the new analysis becomes it. - m_guessTable[newAnalysis.Analysis] = newAnalysis; - result = true; // we didn't clear the cache but did change it. + GuessTable.Remove(oldAnalysis); + result = true; } - else if (currentDefault != newAnalysis) + if (CaselessGuessTable.ContainsKey(oldAnalysis)) { - // Some other analysis just became more common...maybe now the default? - ClearGuessData(); - return true; + CaselessGuessTable.Remove(oldAnalysis); + result = true; } - // We haven't messed up any guesses so the guess table can survive. - return result; // but we may have filled in some guesses. + return result; } bool IsNotDisapproved(IWfiAnalysis wa) { - ICmAgentEvaluation cae; - if (AnalysisApprovalTable.TryGetValue(wa, out cae)) + ICmAgentEvaluation cae = null; + foreach (var ae in wa.EvaluationsRC) + if (((ICmAgent)ae.Owner).Human) + cae = ae; + if (cae != null) return cae.Approves; return true; // no opinion } @@ -192,6 +218,7 @@ public OpinionAgent GetOpinionAgent(IWfiAnalysis wa) return OpinionAgent.Human; } + /// /// /// @@ -199,12 +226,31 @@ public OpinionAgent GetOpinionAgent(IWfiAnalysis wa) /// public bool IsHumanApproved(IWfiAnalysis wa) { - ICmAgentEvaluation cae; - if (AnalysisApprovalTable.TryGetValue(wa, out cae)) + ICmAgentEvaluation cae = null; + foreach (var ae in wa.EvaluationsRC) + if (((ICmAgent)ae.Owner).Human) + cae = ae; + if (cae != null) return cae.Approves; return false; // no opinion } + /// + /// + /// + /// + /// + public bool IsHumanDisapproved(IWfiAnalysis wa) + { + ICmAgentEvaluation cae = null; + foreach (var ae in wa.EvaluationsRC) + if (((ICmAgent)ae.Owner).Human) + cae = ae; + if (cae != null) + return !cae.Approves; + return false; // no opinion + } + /// /// /// @@ -212,7 +258,8 @@ public bool IsHumanApproved(IWfiAnalysis wa) /// public bool IsComputerApproved(IWfiAnalysis candidate) { - return ComputerApprovedTable.Contains(candidate); + var agent = Cache.LangProject.DefaultComputerAgent; + return candidate.GetAgentOpinion(agent) == Opinions.approves; } /// @@ -222,57 +269,517 @@ public bool IsComputerApproved(IWfiAnalysis candidate) /// public bool IsParserApproved(IWfiAnalysis candidate) { - return ParserApprovedTable.Contains(candidate); + var agent = Cache.LangProject.DefaultParserAgent; + return candidate.GetAgentOpinion(agent) == Opinions.approves; + } + + /// + /// + /// + /// + /// + public bool IsParserDisapproved(IWfiAnalysis candidate) + { + var agent = Cache.LangProject.DefaultParserAgent; + return candidate.GetAgentOpinion(agent) == Opinions.disapproves; + } + + /// + /// Try to get the default analysis for form in the context of its previous word form. + /// If form is an analysis,then the result is a gloss. + /// If form is a wordform, then try to get the default gloss of the default analysis if it exists. + /// Use m_emptyWAG as the previous word form for the first analysis in a segment. + /// Use m_nullWAG as the previous word form if the previous word form is unknown. + /// + /// the form that you want an analysis for + /// the lowercase version of form if its analyses should be included + /// the context of the form + /// the resulting analysis + /// bool + private bool TryGetContextAwareGuess(IAnalysis form, IWfiWordform lowercaseForm, IAnalysis previous, out IAnalysis analysis) + { + IDictionary> guessTable = lowercaseForm != null ? CaselessGuessTable : GuessTable; + + if (!guessTable.ContainsKey(form)) + { + // Fill in GuessTable. + guessTable[form] = GetDefaultAnalyses(form, lowercaseForm); + } + if (!guessTable[form].ContainsKey(previous)) + { + // back off to all forms. + previous = m_nullWAG; + if (!guessTable[form].ContainsKey(previous)) + { + // form doesn't occur in the interlinear texts. + analysis = m_nullWAG; + return false; + } + } + analysis = guessTable[form][previous]; + if (analysis == null) + return false; + if (analysis is IWfiAnalysis) + { + // Get the best gloss for analysis. + if (TryGetContextAwareGuess(analysis, null, previous, out IAnalysis gloss)) + { + analysis = gloss; + } + } + return true; + } + + /// + /// Get the default analyses for the given form in the context of the previous word form. + /// If lowercaseForm is given, then include its analyses, too. + /// If form is an analysis,then the default analyses are glosses. + /// Uses m_emptyWAG as previous word form for the first analysis in a segment. + /// Uses m_nullWAG as previous word form when unknown. + /// + /// the form that you want analyses for + /// lowercase version of form + /// Dictionary + private Dictionary GetDefaultAnalyses(IAnalysis form, IWfiWordform lowercaseForm) + { + Dictionary defaults = new Dictionary(); + Dictionary> counts = null; + if (form is IWfiWordform wordform) + { + // Get default analyses. + counts = GetAnalysisCounts(wordform); + if (lowercaseForm != null) + // Add lowercase analyses to counts. + GetAnalysisCounts(lowercaseForm, true, counts); + } + else if (form is IWfiAnalysis analysis) + { + // Get default glosses. + counts = GetGlossCounts(analysis); + } + if (counts != null) + { + // Get the best analysis for each key in counts. + foreach (IAnalysis previous in counts.Keys) + { + IAnalysis best = null; + // Use counts[previous].Keys instead of wordform.AnalysesOC + // because counts[previous].Keys may include lowercase analyses. + foreach (IAnalysis key in counts[previous].Keys) + { + if (best == null || ComparePriorityCounts(key, best, previous, counts) < 0) + { + best = key; + defaults[previous] = best; + } + } + } + } + return defaults; + } + + /// + /// Get analysis counts for the given word form in the context of the previous word form. + /// Uses m_emptyWAG as previous word form for the first analysis in a segment. + /// Uses m_nullWAG as previous word form when unknown. + /// This is used by GetBestGuess for word forms and GetSortedAnalysisGuesses. + /// + /// the form that you want an analysis for + /// Dictionary> + private Dictionary> GetAnalysisCounts(IWfiWordform wordform, bool lowercased = false, + Dictionary> counts = null) + { + if (counts == null) + counts = new Dictionary>(); + var segs = new HashSet(); + foreach (ISegment seg in wordform.OccurrencesInTexts) + { + if (segs.Contains(seg)) continue; + segs.Add(seg); + for (int i = 0; i < seg.AnalysesRS.Count; i++) + { + IAnalysis analysis = seg.AnalysesRS[i]; + if (analysis.Wordform != wordform) continue; + IAnalysis previous = GetPreviousWordform(seg, i); + if (analysis is IWfiGloss) + { + // Get analysis for gloss. + analysis = analysis.Analysis; + } + if (analysis is IWfiAnalysis) + { + // Add high priority count to analysis. + AddAnalysisCount(previous, analysis, 7, lowercased, counts); + } + } + } + // Include analyses that may not have been selected. + foreach (IWfiAnalysis analysis in wordform.AnalysesOC) + { + if (IsNotDisapproved(analysis)) + { + // Human takes priority over parser which takes priority over computer. + // Approved takes priority over disapproved. + // More counts take priority over fewer counts within a priority. + int priority = IsHumanApproved(analysis) ? 6 : IsHumanDisapproved(analysis) ? 1 : + IsParserApproved(analysis) ? 5 : IsParserDisapproved(analysis) ? 2 : + IsComputerApproved(analysis) ? 4 : 3; + AddAnalysisCount(m_nullWAG, analysis, priority, lowercased, counts); + } + } + return counts; + } + + /// + /// Get gloss counts for the given analysis in the context of the previous word form. + /// If form is an analysis,then the analysis counts are for glosses. + /// Uses m_emptyWAG as previous word form for the first analysis in a segment. + /// Uses m_nullWAG as previous word form when unknown. + /// This is used by GetBestGuess for analyses and GetSortedGlossGuesses. + /// + /// the analysis that you want a gloss for + /// Dictionary> + private Dictionary> GetGlossCounts(IWfiAnalysis analysis) + { + var counts = new Dictionary>(); + var segs = new HashSet(); + if (!IsNotDisapproved(analysis)) + return counts; + foreach (ISegment seg in analysis.Wordform.OccurrencesInTexts) + { + if (segs.Contains(seg)) continue; + segs.Add(seg); + for (int i = 0; i < seg.AnalysesRS.Count; i++) + { + // Get gloss for analysis. + IAnalysis previous = GetPreviousWordform(seg, i); + IAnalysis gloss = seg.AnalysesRS[i]; + if (gloss is IWfiGloss) + { + if (gloss.Analysis == analysis) + { + // Add high priority count to gloss. + AddAnalysisCount(previous, gloss, 2, false, counts); + } + } + } + } + // Include glosses that may not have been selected. + foreach (IWfiGloss gloss in analysis.MeaningsOC) + { + AddAnalysisCount(m_nullWAG, gloss, 1, false, counts); + } + return counts; + } + + /// + /// Get the previous word form given a location. + /// + /// the segment of the location + /// the index of the location + /// IAnalysis + private IAnalysis GetPreviousWordform(ISegment seg, int i) + { + if (i == 0) + return m_emptyWAG; + IAnalysis previous = seg.AnalysesRS[i - 1]; + if (previous is IWfiAnalysis || previous is IWfiGloss) + { + previous = previous.Wordform; + } + // Should be IWfiWordform or IPunctuationForm. + return previous; } - void LoadAnalysisApprovalTable() + /// + /// Add a count to counts for analysis with the given previous word form and the given priority. + /// + /// the previous word form + /// the analysis being counted + /// the priority of the count + /// whether the word form of the analysis was lowercased + /// the dictionary of counts being incremented + /// void + private void AddAnalysisCount(IAnalysis previous, IAnalysis analysis, int priority, bool lowercased, + Dictionary> counts) + { + if (previous != m_nullWAG) + { + // Record count for unknown/backoff. + AddAnalysisCount(m_nullWAG, analysis, priority, lowercased, counts); + } + if (!counts.ContainsKey(previous)) + { + counts[previous] = new Dictionary(); + } + if (!counts[previous].ContainsKey(analysis)) + { + counts[previous][analysis] = new PriorityCount(); + } + if (counts[previous][analysis].priority > priority) + { + // Ignore this count because its priority is too low. + return; + } + if (counts[previous][analysis].priority < priority) + { + // Start a new priority count. + counts[previous][analysis].priority = priority; + counts[previous][analysis].lowercased = lowercased; + counts[previous][analysis].count = 0; + } + // Increment count. + counts[previous][analysis].count += 1; + } + + /// + /// Compare the priority counts for a1 and a2 based on + /// the previous wordform and a dictionary of counts. + /// Sort in descending order. + /// + private int ComparePriorityCounts(IAnalysis a1, IAnalysis a2, IAnalysis previous, + Dictionary> counts) { - var dictionary = new Dictionary(); - foreach(var analysis in Cache.ServiceLocator.GetInstance().AllInstances()) - foreach (var ae in analysis.EvaluationsRC) - if (((ICmAgent) ae.Owner).Human) - dictionary[analysis] = ae; - AnalysisApprovalTable = dictionary; + // Check for existence of previous. + if (!counts.ContainsKey(previous)) + { + previous = m_nullWAG; + if (!counts.ContainsKey(previous)) + return 0; + } + // See if we should back off. + if (!counts[previous].ContainsKey(a1) && !counts[previous].ContainsKey(a2)) + previous = m_nullWAG; + // Prefer higher priority counts. + int priority1 = counts[previous].ContainsKey(a1) ? counts[previous][a1].priority : 0; + int priority2 = counts[previous].ContainsKey(a2) ? counts[previous][a2].priority : 0; + if (priority1 < priority2) + return 1; + if (priority1 > priority2) + return -1; + // Prefer higher counts. + int count1 = counts[previous].ContainsKey(a1) ? counts[previous][a1].count : 0; + int count2 = counts[previous].ContainsKey(a2) ? counts[previous][a2].count : 0; + if (count1 < count2) + return 1; + if (count1 > count2) + return -1; + // Prefer analyses that haven't been lowercased. + bool lowercased1 = counts[previous].ContainsKey(a1) && counts[previous][a1].lowercased; + bool lowercased2 = counts[previous].ContainsKey(a2) && counts[previous][a2].lowercased; + if (lowercased1 && !lowercased2) + return 1; + if (lowercased2 && !lowercased1) + return -1; + // Maintain a complete order to avoid non-determinism. + // This means that GetBestGuess and GetSortedAnalyses[0] should have the same analysis. + return a1.Guid.CompareTo(a2.Guid); } - void LoadComputerApprovedTable() + /// + /// Whenever the data we depend upon changes, use this to make sure we load the latest Guess data. + /// + public void ClearGuessData() { - IEnumerable list = GetAgentApprovedList(Cache.LangProject.DefaultComputerAgent); - ComputerApprovedTable = new HashSet(list); + GuessTable = null; + CaselessGuessTable = null; } /// - /// Get all the analyses approved by the specified agent. + /// Given a wordform, provide the best analysis guess for it (using the default vernacular WS). /// - /// + /// /// - private IEnumerable GetAgentApprovedList(ICmAgent agent) + public IAnalysis GetBestGuess(IWfiWordform wf) { - return Cache.ServiceLocator.GetInstance().AllInstances().Where( - analysis => analysis.GetAgentOpinion(agent) == Opinions.approves); + return GetBestGuess(wf, wf.Cache.DefaultVernWs); } - void LoadParserApprovedTable() + /// + /// Given a wf provide the best guess based on the user-approved analyses (in or outside of texts). + /// If we don't already have a guess, this will try to create one from the lexicon, based on the + /// form in the specified WS. + /// + public IAnalysis GetBestGuess(IWfiWordform wf, int ws) { - IEnumerable list = GetAgentApprovedList(Cache.LangProject.DefaultParserAgent); - ParserApprovedTable = new HashSet(list); + if (!EntryGenerated(wf)) + GenerateEntryGuesses(wf, ws); + IAnalysis wag; + if (TryGetContextAwareGuess(wf, null, m_nullWAG, out wag)) + return wag; + return new NullWAG(); } /// - /// NOTE: this only gets analyses and glosses that are referred to by Segment.AnalysesRS + /// Given a wa provide the best guess based on glosses for that analysis (made in or outside of texts). /// + /// /// - IEnumerable GetAllAnalysesAndGlossesOrderedByFrequencyOfUseInText() + public IAnalysis GetBestGuess(IWfiAnalysis wa) + { + IAnalysis wag; + if (TryGetContextAwareGuess(wa, null, m_nullWAG, out wag)) + return wag; + return new NullWAG(); + } + + /// + /// This guess factors in the placement of an occurrence in its segment for making other + /// decisions like matching lowercase alternatives for sentence initial occurrences. + /// + /// + /// True: Do lowercase matching only if the occurrence index is zero. + /// False: Do lowercase matching regardless of the occurrence index. + /// + public IAnalysis GetBestGuess(AnalysisOccurrence occurrence, bool onlyIndexZeroLowercaseMatching = true) { - return from seg in Cache.ServiceLocator.GetInstance().AllInstances() - from analysis in seg.AnalysesRS - where (analysis is IWfiAnalysis || analysis is IWfiGloss) - group analysis by analysis - into countedAnalysis - orderby countedAnalysis.Count() - select countedAnalysis.Key; + // first see if there is a relevant lowercase form of a sentence initial (non-lowercase) wordform + // TODO: make it look for the first word in the sentence...may not be at Index 0! + IWfiWordform lowercaseWf = null; + if (occurrence.Analysis is IWfiWordform wordform) + { + if (!EntryGenerated(wordform)) + GenerateEntryGuesses(wordform, occurrence.BaselineWs); + if (!onlyIndexZeroLowercaseMatching || occurrence.Index == 0) + { + lowercaseWf = GetLowercaseWordform(occurrence); + if (lowercaseWf != null) + { + if (!EntryGenerated(lowercaseWf)) + GenerateEntryGuesses(lowercaseWf, occurrence.BaselineWs); + } + } + } + if (occurrence.BaselineWs == -1) + return new NullWAG(); // happens with empty translation lines + IAnalysis bestGuess; + IAnalysis previous = GetPreviousWordform(occurrence.Segment, occurrence.Index); + if (TryGetContextAwareGuess(occurrence.Analysis, lowercaseWf, previous, out bestGuess)) + return bestGuess; + return new NullWAG(); } + /// + /// Get the lowercase word form if the occurrence is uppercase. + /// + private IWfiWordform GetLowercaseWordform(AnalysisOccurrence occurrence) + { + ITsString tssWfBaseline = occurrence.BaselineText; + var cf = new CaseFunctions(Cache.ServiceLocator.WritingSystemManager.Get(tssWfBaseline.get_WritingSystemAt(0))); + string sLower = cf.ToLower(tssWfBaseline.Text); + // don't bother looking up the lowercased wordform if the instanceOf is already in lowercase form. + if (sLower != tssWfBaseline.Text) + { + ITsString tssLower = TsStringUtils.MakeString(sLower, TsStringUtils.GetWsAtOffset(tssWfBaseline, 0)); + IWfiWordform lowercaseWf; + if (Cache.ServiceLocator.GetInstance().TryGetObject(tssLower, out lowercaseWf)) + return lowercaseWf; + } + return null; + } + + private IAnalysis GetBestGuess(IAnalysis wag, int ws) + { + if (wag is IWfiWordform) + return GetBestGuess(wag.Wordform, ws); + if (wag is IWfiAnalysis) + return GetBestGuess(wag.Analysis); + return new NullWAG(); + } + + /// + /// + /// + public bool TryGetBestGuess(IAnalysis wag, int ws, out IAnalysis bestGuess) + { + bestGuess = GetBestGuess(wag, ws); + return !(bestGuess is NullWAG); + } + + /// + /// + /// + /// + /// True: Do lowercase matching only if the occurrence index is zero. + /// False: Do lowercase matching regardless of the occurrence index. + /// + public bool TryGetBestGuess(AnalysisOccurrence occurrence, out IAnalysis bestGuess, bool onlyIndexZeroLowercaseMatching = true) + { + bestGuess = GetBestGuess(occurrence, onlyIndexZeroLowercaseMatching); + return !(bestGuess is NullWAG); + } + + /// + /// Get possible analyses for the wordform sorted by priority. + /// wordform to get analyses for + /// the location of the wordform + /// + /// True: Do lowercase matching only if the occurrence index is zero. + /// False: Do lowercase matching regardless of the occurrence index. + /// + /// + public List GetSortedAnalysisGuesses(IWfiWordform wordform, AnalysisOccurrence occurrence, bool onlyIndexZeroLowercaseMatching = true) + { + int ws = occurrence != null ? occurrence.BaselineWs : wordform.Cache.DefaultVernWs; + return GetSortedAnalysisGuesses(wordform, ws, occurrence, onlyIndexZeroLowercaseMatching); + } + + /// + /// Get possible analyses for the wordform sorted by priority. + /// wordform to get analyses for + /// the writing system for wordform + /// + public List GetSortedAnalysisGuesses(IWfiWordform wordform, int ws) + { + return GetSortedAnalysisGuesses(wordform, ws, null); + } + + /// + /// Get possible analyses for the wordform sorted by priority. + /// wordform to get analyses for + /// the writing system for wordform + /// the location of wordform + /// + /// True: Do lowercase matching only if the occurrence index is zero. + /// False: Do lowercase matching regardless of the occurrence index. + /// + /// + private List GetSortedAnalysisGuesses(IWfiWordform wordform, int ws, AnalysisOccurrence occurrence, bool onlyIndexZeroLowercaseMatching = true) + { + if (!EntryGenerated(wordform)) + GenerateEntryGuesses(wordform, ws); + + var counts = GetAnalysisCounts(wordform); + List analyses = wordform.AnalysesOC.ToList(); + if (occurrence != null && (!onlyIndexZeroLowercaseMatching || occurrence.Index == 0)) + { + IWfiWordform lowercaseWf = GetLowercaseWordform(occurrence); + if (lowercaseWf != null) + { + // Add lowercase analyses. + if (!EntryGenerated(lowercaseWf)) + GenerateEntryGuesses(lowercaseWf, ws); + GetAnalysisCounts(lowercaseWf, true, counts); + analyses.AddRange(lowercaseWf.AnalysesOC); + } + } + var previous = occurrence == null ? m_nullWAG : GetPreviousWordform(occurrence.Segment, occurrence.Index); + analyses.Sort((x, y) => ComparePriorityCounts(x, y, previous, counts)); + return analyses; + } + + /// + /// Get possible glosses for the analysis sorted by priority. + /// + public List GetSortedGlossGuesses(IWfiAnalysis analysis, AnalysisOccurrence occurrence = null) + { + var counts = GetGlossCounts(analysis); + var previous = occurrence == null ? m_nullWAG : GetPreviousWordform(occurrence.Segment, occurrence.Index); + List glosses = analysis.MeaningsOC.ToList(); + glosses.Sort((x, y) => ComparePriorityCounts(x, y, previous, counts)); + return glosses; + } + #region GenerateEntryGuesses /// /// This class stores the relevant database ids for information which can generate a /// default analysis for a WfiWordform that has no analyses, but whose form exactly @@ -303,7 +810,7 @@ public bool Equals(ITsString x, ITsString y) public int GetHashCode(ITsString obj) { - return(obj.Text ?? "").GetHashCode() ^ obj.get_WritingSystem(0); + return (obj.Text ?? "").GetHashCode() ^ obj.get_WritingSystem(0); } } @@ -379,7 +886,7 @@ private Dictionary MapWordsForComputerGuessesToBestM IPartOfSpeech pos = null; if (sense != null) { - msa = (IMoStemMsa) sense.MorphoSyntaxAnalysisRA; + msa = (IMoStemMsa)sense.MorphoSyntaxAnalysisRA; pos = msa.PartOfSpeechRA; } // map the word to its best entry. @@ -422,6 +929,16 @@ private bool HasAnalysis(IWfiWordform word) return word.AnalysesOC.Count > 0; } + /// + /// Has GenerateEntryGuesses already been called for wordform? + /// + /// + private bool EntryGenerated(IWfiWordform wordform) + { + // NB: This is a hack. It assumes that analyses + // aren't created before GenerateEntryGuesses is called. + return wordform.AnalysesOC.Count > 0; + } /// /// For the given text, find words for which we can generate analyses that match lexical entries. /// @@ -457,254 +974,36 @@ private void GenerateEntryGuesses(IDictionary map) NonUndoableUnitOfWorkHelper.DoUsingNewOrCurrentUowOrSkip(Cache.ActionHandlerAccessor, "Trying to generate guesses during PropChanged when we can't save them.", () => + { + var newAnalysis = waFactory.Create(ww, wgFactory); + newAnalysis.CategoryRA = info.Pos; + // Not all entries have senses. + if (info.Sense != null) { - var newAnalysis = waFactory.Create(ww, wgFactory); - newAnalysis.CategoryRA = info.Pos; - // Not all entries have senses. - if (info.Sense != null) - { - // copy all the gloss alternatives from the sense into the word gloss. - IWfiGloss wg = newAnalysis.MeaningsOC.First(); - wg.Form.MergeAlternatives(info.Sense.Gloss); - } - var wmb = wmbFactory.Create(); - newAnalysis.MorphBundlesOS.Add(wmb); - if (info.Form != null) - wmb.MorphRA = info.Form; - if (info.Msa != null) - wmb.MsaRA = info.Msa; - if (info.Sense != null) - wmb.SenseRA = info.Sense; - - // Now, set up an approved "Computer" evaluation of this generated analysis - computerAgent.SetEvaluation(newAnalysis, Opinions.approves); - }); - } - } - } - - void LoadGuessTable() - { - GuessTable = new Dictionary(); - - HashSet analysesRemaining; - HashSet glossesRemaining; - AddOccurrenceApprovedAnalysesAndGlossesToGuessTable(out analysesRemaining, out glossesRemaining); - - // add any remaining Human approved analyses. - AddRemainingNonDisapprovedGlossesAndAnalysesToGuessTable(glossesRemaining, analysesRemaining, - IsHumanApproved); - - // next go through any (Parser) generated analyses and glosses. - AddRemainingNonDisapprovedGlossesAndAnalysesToGuessTable(glossesRemaining, analysesRemaining, - IsParserApprovedAndNotDisapproved); - - // lastly, add any remaining approved analyses/glosses (e.g. Computer guesses) - AddRemainingNonDisapprovedGlossesAndAnalysesToGuessTable(glossesRemaining, analysesRemaining, - IsNotDisapproved); - - } - - /// - /// Whenever the data we depend upon changes, use this to make sure we load the latest Guess data. - /// - public void ClearGuessData() - { - GuessTable = null; - ParserApprovedTable = null; - ComputerApprovedTable = null; - AnalysisApprovalTable = null; - } - - /// - /// adds analyses/glosses that have been approved in a text. - /// - /// analyses that were not processed by this routine - /// glosses that were not processed by this routine - private void AddOccurrenceApprovedAnalysesAndGlossesToGuessTable( - out HashSet analysesRemaining, - out HashSet glossesRemaining) - { - // keep track of the analyses we've made a decision about whether to load into the GuessTable. - analysesRemaining = new HashSet(Cache.ServiceLocator.GetInstance().AllInstances()); - glossesRemaining = new HashSet(Cache.ServiceLocator.GetInstance().AllInstances()); - foreach (var wag in GetAllAnalysesAndGlossesOrderedByFrequencyOfUseInText()) - { - if (wag is IWfiAnalysis) - { - // since an occurrence has an instanceOf this analysis - GuessTable[wag.Wordform] = wag; - analysesRemaining.Remove(wag.Analysis); - } - if (wag is IWfiGloss) - { - GuessTable[wag.Wordform] = wag; - GuessTable[wag.Analysis] = wag; - glossesRemaining.Remove(wag as IWfiGloss); - analysesRemaining.Remove(wag.Analysis); + // copy all the gloss alternatives from the sense into the word gloss. + IWfiGloss wg = newAnalysis.MeaningsOC.First(); + wg.Form.MergeAlternatives(info.Sense.Gloss); + } + var wmb = wmbFactory.Create(); + newAnalysis.MorphBundlesOS.Add(wmb); + if (info.Form != null) + wmb.MorphRA = info.Form; + if (info.Msa != null) + wmb.MsaRA = info.Msa; + if (info.Sense != null) + wmb.SenseRA = info.Sense; + + // Now, set up an approved "Computer" evaluation of this generated analysis + computerAgent.SetEvaluation(newAnalysis, Opinions.approves); + // Clear GuessTable entries. + if (GuessTable.ContainsKey(ww)) + GuessTable.Remove(ww); + if (CaselessGuessTable.ContainsKey(ww)) + CaselessGuessTable.Remove(ww); + }); } } } - - private delegate bool AddToGuessTableCondition( - IDictionary guessMap, IAnalysis candidate); - - - bool IsHumanApproved(IDictionary guessMap, IAnalysis candidate) - { - return !guessMap.Keys.Contains(candidate.Wordform) && IsHumanApproved(candidate.Analysis); - } - - bool IsParserApprovedAndNotDisapproved(IDictionary guessMap, IAnalysis candidate) - { - return !guessMap.Keys.Contains(candidate.Wordform) && IsNotDisapproved(candidate.Analysis) && IsParserApproved(candidate.Analysis); - } - - bool IsNotDisapproved(IDictionary guessMap, IAnalysis candidate) - { - return !guessMap.Keys.Contains(candidate.Wordform) && IsNotDisapproved(candidate.Analysis); - } - - private void AddRemainingNonDisapprovedGlossesAndAnalysesToGuessTable( - HashSet glossesRemaining, - HashSet analysesRemaining, - AddToGuessTableCondition accept) - { - IDictionary tmpGuesses = new Dictionary(); - foreach (var wg in glossesRemaining) - { - // approved analyses have precendence over "no opinion". - if (accept(tmpGuesses, wg)) - { - tmpGuesses[wg.Wordform] = wg; - tmpGuesses[wg.Analysis] = wg; - } - } - foreach (var wa in analysesRemaining) - { - // approved analyses have precendence over "no opinion". - if (accept(tmpGuesses, wa)) - { - tmpGuesses[wa.Wordform] = wa; - } - } - foreach (var pair in tmpGuesses) - { - // don't overwrite any existing mapping from texts - if (!GuessTable.Keys.Contains(pair.Key)) - GuessTable.Add(pair); - if (pair.Value is IWfiGloss) - glossesRemaining.Remove(pair.Value as IWfiGloss); - analysesRemaining.Remove(pair.Value.Analysis); - } - } - - /// - /// Given a wordform, provide the best analysis guess for it (using the default vernacular WS). - /// - /// - /// - public IAnalysis GetBestGuess(IWfiWordform wf) - { - return GetBestGuess(wf, wf.Cache.DefaultVernWs); - } - - /// - /// Given a wf provide the best guess based on the user-approved analyses (in or outside of texts). - /// If we don't already have a guess, this will try to create one from the lexicon, based on the - /// form in the specified WS. - /// - public IAnalysis GetBestGuess(IWfiWordform wf, int ws) - { - IAnalysis wag; - if (GuessTable.TryGetValue(wf, out wag)) - return wag; - if (wf.AnalysesOC.Count == 0) - { - GenerateEntryGuesses(wf, ws); - if (GuessTable.TryGetValue(wf, out wag)) - return wag; - } - return new NullWAG(); - } - - /// - /// Given a wa provide the best guess based on glosses for that analysis (made in or outside of texts). - /// - /// - /// - public IAnalysis GetBestGuess(IWfiAnalysis wa) - { - IAnalysis wag; - if (GuessTable.TryGetValue(wa, out wag)) - return wag; - return new NullWAG(); - } - - /// - /// This guess factors in the placement of an occurrence in its segment for making other - /// decisions like matching lowercase alternatives for sentence initial occurrences. - /// - /// - /// True: Do lowercase matching only if the occurrence index is zero. - /// False: Do lowercase matching regardless of the occurrence index. - /// - public IAnalysis GetBestGuess(AnalysisOccurrence occurrence, bool onlyIndexZeroLowercaseMatching = true) - { - // first see if we can make a guess based on the lowercase form of a sentence initial (non-lowercase) wordform - // TODO: make it look for the first word in the sentence...may not be at Index 0! - if (occurrence.Analysis is IWfiWordform && (!onlyIndexZeroLowercaseMatching || occurrence.Index == 0)) - { - ITsString tssWfBaseline = occurrence.BaselineText; - var cf = new CaseFunctions(Cache.ServiceLocator.WritingSystemManager.Get(tssWfBaseline.get_WritingSystemAt(0))); - string sLower = cf.ToLower(tssWfBaseline.Text); - // don't bother looking up the lowercased wordform if the instanceOf is already in lowercase form. - if (sLower != tssWfBaseline.Text) - { - ITsString tssLower = TsStringUtils.MakeString(sLower, TsStringUtils.GetWsAtOffset(tssWfBaseline, 0)); - IWfiWordform lowercaseWf; - if (Cache.ServiceLocator.GetInstance().TryGetObject(tssLower, out lowercaseWf)) - { - IAnalysis bestGuess; - if (TryGetBestGuess(lowercaseWf, occurrence.BaselineWs, out bestGuess)) - return bestGuess; - } - } - } - if (occurrence.BaselineWs == -1) - return null; // happens with empty translation lines - return GetBestGuess(occurrence.Analysis, occurrence.BaselineWs); - } - - private IAnalysis GetBestGuess(IAnalysis wag, int ws) - { - if (wag is IWfiWordform) - return GetBestGuess(wag.Wordform, ws); - if (wag is IWfiAnalysis) - return GetBestGuess(wag.Analysis); - return new NullWAG(); - } - - /// - /// - /// - public bool TryGetBestGuess(IAnalysis wag, int ws, out IAnalysis bestGuess) - { - bestGuess = GetBestGuess(wag, ws); - return !(bestGuess is NullWAG); - } - - /// - /// - /// - /// - /// True: Do lowercase matching only if the occurrence index is zero. - /// False: Do lowercase matching regardless of the occurrence index. - /// - public bool TryGetBestGuess(AnalysisOccurrence occurrence, out IAnalysis bestGuess, bool onlyIndexZeroLowercaseMatching = true) - { - bestGuess = GetBestGuess(occurrence, onlyIndexZeroLowercaseMatching); - return !(bestGuess is NullWAG); - } + #endregion GenerateEntryGuesses } } diff --git a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs index c286f24f..f17fa011 100644 --- a/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs +++ b/tests/SIL.LCModel.Tests/DomainServices/AnalysisGuessServicesTests.cs @@ -3,10 +3,12 @@ // (http://www.gnu.org/licenses/lgpl-2.1.html) using System.Collections.Generic; +using System.IO; using System.Linq; using NUnit.Framework; using SIL.LCModel.Core.Text; using SIL.LCModel.DomainImpl; +using SIL.LCModel.Infrastructure; using SIL.ObjectModel; namespace SIL.LCModel.DomainServices @@ -110,6 +112,39 @@ internal void DoDataSetup() " " + Words_para0[4].Form.BestVernacularAlternative.Text + " " + Words_para0[5].Form.BestVernacularAlternative.Text + ".", wsVern)); Para0.Contents = bldr.GetString(); + /* c c c c d c d c d c d c, c. */ + IWfiWordform c = wfFactory.Create(TsStringUtils.MakeString("c", wsVern)); + IWfiWordform d = wfFactory.Create(TsStringUtils.MakeString("d", wsVern)); + Words_para0.Add(c); + Words_para0.Add(c); + Words_para0.Add(c); + Words_para0.Add(c); + Words_para0.Add(d); + Words_para0.Add(c); + Words_para0.Add(d); + Words_para0.Add(c); + Words_para0.Add(d); + Words_para0.Add(c); + Words_para0.Add(d); + Words_para0.Add(c); + Words_para0.Add(c); // after punctuation + var bldr2 = Para0.Contents.GetIncBldr(); + bldr2.AppendTsString(TsStringUtils.MakeString( + " " + Words_para0[6].Form.BestVernacularAlternative.Text + + " " + Words_para0[7].Form.BestVernacularAlternative.Text + + " " + Words_para0[8].Form.BestVernacularAlternative.Text + + " " + Words_para0[9].Form.BestVernacularAlternative.Text + + " " + Words_para0[10].Form.BestVernacularAlternative.Text + + " " + Words_para0[11].Form.BestVernacularAlternative.Text + + " " + Words_para0[12].Form.BestVernacularAlternative.Text + + " " + Words_para0[13].Form.BestVernacularAlternative.Text + + " " + Words_para0[14].Form.BestVernacularAlternative.Text + + " " + Words_para0[15].Form.BestVernacularAlternative.Text + + " " + Words_para0[16].Form.BestVernacularAlternative.Text + + " " + Words_para0[17].Form.BestVernacularAlternative.Text + + ", " + Words_para0[18].Form.BestVernacularAlternative.Text + + ".", wsVern)); + Para0.Contents = bldr2.GetString(); using (ParagraphParser pp = new ParagraphParser(Cache)) { foreach (IStTxtPara para in StText.ParagraphsOS) @@ -376,7 +411,6 @@ public void ExpectedGuessForWord_MatchingEntry() setup.EntryFactory.Create("a", "aroot", SandboxGenericMSA.Create(MsaType.kRoot, null)); // expect a guess to be generated - setup.GuessServices.GenerateEntryGuesses(setup.StText); var guessActual = setup.GuessServices.GetBestGuess(setup.Words_para0[1]); Assert.AreNotEqual(new NullWAG(), guessActual); Assert.AreEqual(newEntry4_expectedMatch.LexemeFormOA.Form.BestVernacularAlternative.Text, guessActual.Wordform.Form.BestVernacularAlternative.Text); @@ -489,6 +523,27 @@ public void ExpectedGuessForWord_MatchingVariantHavingSense() } } + /// + /// make generated entries for upper and lower case and return both for upper case word at beginning of sentence. + /// + [Test] + public void ExpectedGuessForWord_GuessUpperAndLowerGenerated() + { + using (var setup = new AnalysisGuessBaseSetup(Cache, + AnalysisGuessBaseSetup.Flags.PartsOfSpeech, AnalysisGuessBaseSetup.Flags.VariantEntryTypes)) + { + // create an affix entry + setup.EntryFactory.Create("a", "astem", SandboxGenericMSA.Create(MsaType.kStem, setup.Pos_noun)); + setup.EntryFactory.Create("A", "Astem", SandboxGenericMSA.Create(MsaType.kStem, setup.Pos_noun)); + AnalysisOccurrence occurrence = new AnalysisOccurrence(setup.Para0.SegmentsOS[0], 0); + // GenerateEntryGuesses implicitly gets called. + var sorted_analyses = setup.GuessServices.GetSortedAnalysisGuesses(occurrence.Analysis.Wordform, occurrence); + Assert.AreEqual(2, sorted_analyses.Count); + Assert.AreEqual("A", sorted_analyses[0].Analysis.Wordform.ShortName); + Assert.AreEqual("a", sorted_analyses[1].Analysis.Wordform.ShortName); + } + } + /// /// make an approved analysis, expected to be a guess. /// @@ -749,7 +804,7 @@ public void ExpectedAnalysisGuess_MultipleAnalyses_PreferAnalysisInText() /// - /// If a wordform is in a sentence initial position (and non-lowercase), prefer a guess for + /// If a wordform is in a sentence initial position (and non-lowercase), consider /// the lowercase form. /// [Test] @@ -757,10 +812,20 @@ public void ExpectedAnalysisGuess_ForSentenceInitialPositionLowerCaseAlternative { using (var setup = new AnalysisGuessBaseSetup(Cache)) { - WordAnalysisOrGlossServices.CreateNewAnalysisTreeGloss(setup.Words_para0[0]); + var newWagUppercase = WordAnalysisOrGlossServices.CreateNewAnalysisTreeGloss(setup.Words_para0[0]); var newWagLowercase = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(setup.Words_para0[1]); - var wagUppercase = new AnalysisOccurrence(setup.Para0.SegmentsOS[0], 0); - var guessActual = setup.GuessServices.GetBestGuess(wagUppercase); + var uppercaseOccurrence = new AnalysisOccurrence(setup.Para0.SegmentsOS[0], 0); + // There should be two possible analyses: one uppercase and one lowercase. + var wordform = uppercaseOccurrence.Analysis.Wordform; + var analyses = setup.GuessServices.GetSortedAnalysisGuesses(wordform, uppercaseOccurrence); + Assert.AreEqual(analyses.Count, 2); + // All else being equal, prefer the uppercase analysis. + var guessActual = setup.GuessServices.GetBestGuess(uppercaseOccurrence); + Assert.AreEqual(newWagUppercase.Analysis, guessActual); + // If the lowercase has been selected, prefer the lowercase analysis. + setup.Para0.SetAnalysis(0, 1, newWagLowercase); + setup.GuessServices.ClearGuessData(); + guessActual = setup.GuessServices.GetBestGuess(uppercaseOccurrence); Assert.AreEqual(newWagLowercase.Analysis, guessActual); } @@ -907,6 +972,25 @@ public void ExpectedAnalysisGuessForAnalysis_PreferOneGlossOverOneAnalysis() /// [Test] public void ExpectedAnalysisGuessForWord_PreferFrequentAnalysisOverLessFrequentGloss() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + var newWagApproves = WordAnalysisOrGlossServices.CreateNewAnalysisTreeGloss(setup.Words_para0[1]); + var newWagApproves2 = WordAnalysisOrGlossServices.CreateNewAnalysisTreeGloss(setup.Words_para0[1]); + setup.Para0.SetAnalysis(0, 1, newWagApproves2.Gloss); + setup.Para0.SetAnalysis(0, 2, newWagApproves.WfiAnalysis); + setup.Para0.SetAnalysis(0, 3, newWagApproves.WfiAnalysis); + setup.UserAgent.SetEvaluation(newWagApproves.WfiAnalysis, Opinions.approves); + var guessActual = setup.GuessServices.GetBestGuess(setup.Words_para0[1]); + Assert.AreEqual(newWagApproves.Analysis, guessActual); + } + } + + /// + /// + /// + [Test] + public void ExpectedAnalysisGuessForWord_GetMostCommonGlossOfMostCommonAnalysis() { using (var setup = new AnalysisGuessBaseSetup(Cache)) { @@ -916,7 +1000,7 @@ public void ExpectedAnalysisGuessForWord_PreferFrequentAnalysisOverLessFrequentG setup.Para0.SetAnalysis(0, 3, newWagApproves.WfiAnalysis); setup.UserAgent.SetEvaluation(newWagApproves.WfiAnalysis, Opinions.approves); var guessActual = setup.GuessServices.GetBestGuess(setup.Words_para0[1]); - Assert.AreEqual(newWagApproves.WfiAnalysis, guessActual); + Assert.AreEqual(newWagApproves.Gloss, guessActual); } } @@ -1010,6 +1094,314 @@ public void ExpectedGuess_PreferUserApprovedAnalysisOverParserApprovedAnalysis() Assert.AreEqual(newWagHumanApproves.Analysis, guessActual); } } - } + /// + /// Prefer analyses that are in the right context over analyses that are not. + /// + [Test] + public void ExpectedContextAwareGuess_PreferContextedOverUncontexted() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + var segment = setup.Para0.SegmentsOS[2]; + var uncontextedApprovedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[1].Wordform).Analysis; + var dAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[4].Wordform).Analysis; + var contextedApprovedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[5].Wordform).Analysis; + // Analyses must be set in order. + setup.Para0.SetAnalysis(2, 0, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 1, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 2, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 3, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 4, dAnalysis); // "d" + setup.Para0.SetAnalysis(2, 5, contextedApprovedAnalysis); // "c" + // Verify uncontexted guess. + var wordform = segment.AnalysesRS[7].Wordform; + var guessActual = setup.GuessServices.GetBestGuess(wordform); + Assert.AreEqual(uncontextedApprovedAnalysis, guessActual); + AnalysisOccurrence occurrence = new AnalysisOccurrence(segment, 7); + // Make sure we get a contexted guess for occurrence instead of an uncontexted guess. + guessActual = setup.GuessServices.GetBestGuess(occurrence); + Assert.AreEqual(contextedApprovedAnalysis, guessActual); + // Verify uncontexted guess for sort. + var sorted_analyses = setup.GuessServices.GetSortedAnalysisGuesses(wordform, wordform.Cache.DefaultVernWs); + Assert.AreEqual(2, sorted_analyses.Count); + Assert.AreEqual(uncontextedApprovedAnalysis, sorted_analyses[0]); + Assert.AreEqual(contextedApprovedAnalysis, sorted_analyses[1]); + // Make sure the contexted guess is prioritized. + sorted_analyses = setup.GuessServices.GetSortedAnalysisGuesses(wordform, occurrence); + Assert.AreEqual(2, sorted_analyses.Count); + Assert.AreEqual(contextedApprovedAnalysis, sorted_analyses[0]); + Assert.AreEqual(uncontextedApprovedAnalysis, sorted_analyses[1]); + } + } + + /// + /// Prefer analyses that are approved more often in the right context. + /// + [Test] + public void ExpectedContextAwareGuess_PreferTwoContextedApprovedOverOneContextedApproved() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + var segment = setup.Para0.SegmentsOS[2]; + var uncontextedApprovedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[1].Wordform).Analysis; + var dAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[4].Wordform).Analysis; + var approvedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[5].Wordform).Analysis; + var approvedAnalysis2 = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[7].Wordform).Analysis; + // Analyses must be set in order. + // Add uncontexted analyses as a distractor. + setup.Para0.SetAnalysis(2, 0, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 1, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 2, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 3, uncontextedApprovedAnalysis); // "c" + // Set up test. + setup.Para0.SetAnalysis(2, 4, dAnalysis); // "d" + setup.Para0.SetAnalysis(2, 5, approvedAnalysis.Analysis); // "c" + setup.Para0.SetAnalysis(2, 7, approvedAnalysis2.Analysis); // "c" + setup.Para0.SetAnalysis(2, 9, approvedAnalysis2.Analysis); // "c" + // Check guess for occurrence. + AnalysisOccurrence occurrence = new AnalysisOccurrence(segment, 11); + var guessActual = setup.GuessServices.GetBestGuess(occurrence); + Assert.AreEqual(approvedAnalysis2.Analysis, guessActual); + // Check sorted analyses. + var wordform = segment.AnalysesRS[11].Wordform; + var sorted_analyses = setup.GuessServices.GetSortedAnalysisGuesses(wordform, occurrence); + Assert.AreEqual(3, sorted_analyses.Count); + Assert.AreEqual(approvedAnalysis2, sorted_analyses[0]); + Assert.AreEqual(approvedAnalysis, sorted_analyses[1]); + Assert.AreEqual(uncontextedApprovedAnalysis, sorted_analyses[2]); + } + } + + /// + /// Prefer analyses that are approved in the right context over analyses that are human approved. + /// + [Test] + public void ExpectedContextAwareGuess_PreferContextedApprovedOverHumanApproved() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + var segment = setup.Para0.SegmentsOS[2]; + var uncontextedApprovedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[1].Wordform).Analysis; + var dAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[4].Wordform).Analysis; + var approvedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[5].Wordform).Analysis; + var humanApprovedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[7].Wordform).Analysis; + // Analyses must be set in order. + // Add uncontexted analyses as a distractor. + setup.Para0.SetAnalysis(2, 0, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 1, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 2, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 3, uncontextedApprovedAnalysis); // "c" + // Set up test. + setup.Para0.SetAnalysis(2, 4, dAnalysis); // "d" + setup.Para0.SetAnalysis(2, 5, approvedAnalysis.Analysis); // "c" + setup.UserAgent.SetEvaluation(humanApprovedAnalysis, Opinions.approves); // "c" + // Check guess for occurrence. + AnalysisOccurrence occurrence = new AnalysisOccurrence(segment, 11); + var guessActual = setup.GuessServices.GetBestGuess(occurrence); + Assert.AreEqual(approvedAnalysis.Analysis, guessActual); + // Check sorted analyses. + var wordform = segment.AnalysesRS[11].Wordform; + var sorted_analyses = setup.GuessServices.GetSortedAnalysisGuesses(wordform, occurrence); + Assert.AreEqual(3, sorted_analyses.Count); + Assert.AreEqual(approvedAnalysis, sorted_analyses[0]); + Assert.AreEqual(uncontextedApprovedAnalysis, sorted_analyses[1]); + Assert.AreEqual(humanApprovedAnalysis, sorted_analyses[2]); + } + } + + /// + /// Prefer analyses that are approved in the right context over analyses that are parser approved. + /// + [Test] + public void ExpectedContextAwareGuess_PreferContextedApprovedOverParserApproved() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + var segment = setup.Para0.SegmentsOS[2]; + var uncontextedApprovedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[1].Wordform).Analysis; + var dAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[4].Wordform).Analysis; + var approvedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[5].Wordform).Analysis; + var parserApprovedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[7].Wordform).Analysis; + // Analyses must be set in order. + // Add uncontexted analyses as a distractor. + setup.Para0.SetAnalysis(2, 0, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 1, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 2, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 3, uncontextedApprovedAnalysis); // "c" + // Set up test. + setup.Para0.SetAnalysis(2, 4, dAnalysis); // "d" + setup.Para0.SetAnalysis(2, 5, approvedAnalysis.Analysis); // "c" + setup.ParserAgent.SetEvaluation(parserApprovedAnalysis, Opinions.approves); // "c" + // Check guess for occurrence. + AnalysisOccurrence occurrence = new AnalysisOccurrence(segment, 11); + var guessActual = setup.GuessServices.GetBestGuess(occurrence); + Assert.AreEqual(approvedAnalysis.Analysis, guessActual); + // Check sorted analyses. + var wordform = segment.AnalysesRS[11].Wordform; + var sorted_analyses = setup.GuessServices.GetSortedAnalysisGuesses(wordform, occurrence); + Assert.AreEqual(3, sorted_analyses.Count); + Assert.AreEqual(approvedAnalysis, sorted_analyses[0]); + Assert.AreEqual(uncontextedApprovedAnalysis, sorted_analyses[1]); + Assert.AreEqual(parserApprovedAnalysis, sorted_analyses[2]); + } + } + + /// + /// Prefer analyses that are approved in the right context over analyses that are not approved. + /// + [Test] + public void ExpectedContextAwareGuess_PreferContextedApprovedOverUnapproved() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + var segment = setup.Para0.SegmentsOS[2]; + var uncontextedApprovedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[1].Wordform).Analysis; + var dAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[4].Wordform).Analysis; + var approvedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[5].Wordform).Analysis; + var unapprovedAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[7].Wordform).Analysis; + // Analyses must be set in order. + // Add uncontexted analyses as a distractor. + setup.Para0.SetAnalysis(2, 0, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 1, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 2, uncontextedApprovedAnalysis); // "c" + setup.Para0.SetAnalysis(2, 3, uncontextedApprovedAnalysis); // "c" + // Set up test. + setup.Para0.SetAnalysis(2, 4, dAnalysis); // "d" + setup.Para0.SetAnalysis(2, 5, approvedAnalysis.Analysis); // "c" + // Check guess for occurrence. + AnalysisOccurrence occurrence = new AnalysisOccurrence(segment, 11); + var guessActual = setup.GuessServices.GetBestGuess(occurrence); + Assert.AreEqual(approvedAnalysis.Analysis, guessActual); + // Check sorted analyses. + var wordform = segment.AnalysesRS[11].Wordform; + var sorted_analyses = setup.GuessServices.GetSortedAnalysisGuesses(wordform, occurrence); + Assert.AreEqual(3, sorted_analyses.Count); + Assert.AreEqual(approvedAnalysis, sorted_analyses[0]); + Assert.AreEqual(uncontextedApprovedAnalysis, sorted_analyses[1]); + Assert.AreEqual(unapprovedAnalysis, sorted_analyses[2]); + } + } + + /// + /// GetBestGuess should equal GetSortedAnalyses[0]. + /// + [Test] + public void ExpectedContextAwareGuess_CheckGuessWithSorted() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + var segment = setup.Para0.SegmentsOS[2]; + var dAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[4].Wordform).Analysis; + var approvedAnalysis1 = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[0].Wordform).Analysis; + var approvedAnalysis2 = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[1].Wordform).Analysis; + var approvedAnalysis3 = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[2].Wordform).Analysis; + var approvedAnalysis4 = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[3].Wordform).Analysis; + var approvedAnalysis5 = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[5].Wordform).Analysis; + var approvedAnalysis6 = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[7].Wordform).Analysis; + var approvedAnalysis7 = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[9].Wordform).Analysis; + // Analyses must be set in order. + // Create analyses with equal priority. + setup.Para0.SetAnalysis(2, 0, approvedAnalysis7.Analysis); // "c" + setup.Para0.SetAnalysis(2, 1, approvedAnalysis6.Analysis); // "c" + setup.Para0.SetAnalysis(2, 2, approvedAnalysis5.Analysis); // "c" + setup.Para0.SetAnalysis(2, 3, approvedAnalysis4.Analysis); // "c" + setup.Para0.SetAnalysis(2, 4, dAnalysis); // "d" + setup.Para0.SetAnalysis(2, 5, approvedAnalysis3.Analysis); // "c" + setup.Para0.SetAnalysis(2, 7, approvedAnalysis2.Analysis); // "c" + setup.Para0.SetAnalysis(2, 9, approvedAnalysis1.Analysis); // "c" + // Check guess with sorted. + var wordform = segment.AnalysesRS[11].Wordform; + var guessActual = setup.GuessServices.GetBestGuess(wordform); + var sorted_analyses = setup.GuessServices.GetSortedAnalysisGuesses(wordform, wordform.Cache.DefaultVernWs); + Assert.AreEqual(guessActual, sorted_analyses[0]); + } + } + + /// + /// Prefer gloss based on previous word ("river bank" vs. "financial bank"). + /// + [Test] + public void ExpectedContextAwareGloss_PreferContextedOverUncontexted() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + var segment = setup.Para0.SegmentsOS[2]; + var servLoc = segment.Cache.ServiceLocator; + var glossFactory = servLoc.GetInstance(); + var analysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[1].Wordform).Analysis; + var dAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[4].Wordform).Analysis; + var uncontextedApprovedGloss = glossFactory.Create(); + var contextedApprovedGloss = glossFactory.Create(); + analysis.MeaningsOC.Add(uncontextedApprovedGloss); + analysis.MeaningsOC.Add(contextedApprovedGloss); + // Analyses must be set in order. + setup.Para0.SetAnalysis(2, 0, uncontextedApprovedGloss); // "c" + setup.Para0.SetAnalysis(2, 1, uncontextedApprovedGloss); // "c" + setup.Para0.SetAnalysis(2, 2, uncontextedApprovedGloss); // "c" + setup.Para0.SetAnalysis(2, 3, uncontextedApprovedGloss); // "c" + setup.Para0.SetAnalysis(2, 4, dAnalysis); // "d" + setup.Para0.SetAnalysis(2, 5, contextedApprovedGloss); // "c" + // Verify uncontexted guess. + var wordform = segment.AnalysesRS[11].Wordform; + var guessActual = setup.GuessServices.GetBestGuess(wordform); + Assert.AreEqual(uncontextedApprovedGloss, guessActual); + AnalysisOccurrence occurrence = new AnalysisOccurrence(segment, 11); + // Make sure we get a contexted guess for occurrence instead of an uncontexted guess. + guessActual = setup.GuessServices.GetBestGuess(occurrence); + Assert.AreEqual(contextedApprovedGloss, guessActual); + // Verify uncontexted guess for sort. + var sorted_glosses = setup.GuessServices.GetSortedGlossGuesses(analysis); + Assert.AreEqual(2, sorted_glosses.Count); + Assert.AreEqual(uncontextedApprovedGloss, sorted_glosses[0]); + Assert.AreEqual(contextedApprovedGloss, sorted_glosses[1]); + // Make sure the contexted guess is prioritized. + sorted_glosses = setup.GuessServices.GetSortedGlossGuesses(analysis, occurrence); + Assert.AreEqual(2, sorted_glosses.Count); + Assert.AreEqual(contextedApprovedGloss, sorted_glosses[0]); + Assert.AreEqual(uncontextedApprovedGloss, sorted_glosses[1]); + } + } + + /// + /// Prefer glosses that are approved more often in the right context. + /// + [Test] + public void ExpectedContextAwareGloss_PreferTwoContextedOverOneContexted() + { + using (var setup = new AnalysisGuessBaseSetup(Cache)) + { + var segment = setup.Para0.SegmentsOS[2]; + var servLoc = segment.Cache.ServiceLocator; + var glossFactory = servLoc.GetInstance(); + var analysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[1].Wordform).Analysis; + var dAnalysis = WordAnalysisOrGlossServices.CreateNewAnalysisWAG(segment.AnalysesRS[4].Wordform).Analysis; + var uncontextedApprovedGloss = glossFactory.Create(); + var contextedApprovedGloss1 = glossFactory.Create(); + var contextedApprovedGloss2 = glossFactory.Create(); + analysis.MeaningsOC.Add(uncontextedApprovedGloss); + analysis.MeaningsOC.Add(contextedApprovedGloss1); + analysis.MeaningsOC.Add(contextedApprovedGloss2); + // Analyses must be set in order. + setup.Para0.SetAnalysis(2, 0, uncontextedApprovedGloss); // "c" + setup.Para0.SetAnalysis(2, 1, uncontextedApprovedGloss); // "c" + setup.Para0.SetAnalysis(2, 2, uncontextedApprovedGloss); // "c" + setup.Para0.SetAnalysis(2, 3, uncontextedApprovedGloss); // "c" + setup.Para0.SetAnalysis(2, 4, dAnalysis); // "d" + setup.Para0.SetAnalysis(2, 5, contextedApprovedGloss1); // "c" + setup.Para0.SetAnalysis(2, 7, contextedApprovedGloss2); // "c" + setup.Para0.SetAnalysis(2, 9, contextedApprovedGloss2); // "c" + AnalysisOccurrence occurrence = new AnalysisOccurrence(segment, 11); + // Check guess. + var guessActual = setup.GuessServices.GetBestGuess(occurrence); + Assert.AreEqual(contextedApprovedGloss2, guessActual); + // Check sorting. + var sorted_glosses = setup.GuessServices.GetSortedGlossGuesses(analysis, occurrence); + Assert.AreEqual(3, sorted_glosses.Count); + Assert.AreEqual(contextedApprovedGloss2, sorted_glosses[0]); + Assert.AreEqual(contextedApprovedGloss1, sorted_glosses[1]); + Assert.AreEqual(uncontextedApprovedGloss, sorted_glosses[2]); + } + } + } }