Skip to content

Commit

Permalink
Index dictionary by first grapheme instead of first char
Browse files Browse the repository at this point in the history
  • Loading branch information
myieye committed Jun 20, 2024
1 parent 2a7692b commit 614dbd5
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 11 deletions.
4 changes: 2 additions & 2 deletions backend/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ internal void CompleteExemplars(WritingSystems writingSystems)
{
var wsExemplars = writingSystems.Vernacular.Concat(writingSystems.Analysis)
.Distinct()
.ToDictionary(ws => ws, ws => ws.Exemplars.Select(s => s[0]).ToHashSet());
.ToDictionary(ws => ws, ws => ws.Exemplars.ToHashSet());
var wsExemplarsByHandle = wsExemplars.ToFrozenDictionary(kv => GetWritingSystemHandle(kv.Key.Id), kv => kv.Value);

foreach (var entry in _entriesRepository.AllInstances())
Expand All @@ -149,7 +149,7 @@ internal void CompleteExemplars(WritingSystems writingSystems)

foreach (var ws in wsExemplars.Keys)
{
ws.Exemplars = [.. wsExemplars[ws].Order().Select(s => s.ToString())];
ws.Exemplars = [.. wsExemplars[ws].Order()];
}
}

Expand Down
18 changes: 10 additions & 8 deletions backend/FwDataMiniLcmBridge/Api/LcmHelpers.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System.Globalization;
using SIL.LCModel.Core.KernelInterfaces;

namespace FwDataMiniLcmBridge.Api;
Expand Down Expand Up @@ -38,32 +39,33 @@ internal static bool SearchValue(this ITsMultiString multiString, string value)
'\u200C', // Zero Width Non-Joiner
'\u200D', // Zero Width Joiner
'\u200E', // Left-to-Right Mark
'\u200F', // Right-to-Left Mark
'\u200F', // Right-to-Left Mark
'\u2028', // Line Separator
'\u2029', // Paragraph Separator
'\u202F', // Narrow No-Break Space
'\u205F', // Medium Mathematical Space
'\u3000', // Ideographic Space
'\u3000', // Ideographic Space
'\uFEFF', // Zero Width No-Break Space / BOM
];

internal static readonly char[] WhitespaceAndFormattingChars =
[
.. WhitespaceChars,
'\u0640', // Arabic Tatweel
'\u0640'.ToString().Normalize(System.Text.NormalizationForm.FormD)[0], // Arabic Tatweel
];

internal static void ContributeExemplars(ITsMultiString multiString, IReadOnlyDictionary<int, HashSet<char>> wsExemplars)
internal static void ContributeExemplars(ITsMultiString multiString, IReadOnlyDictionary<int, HashSet<string>> wsExemplars)
{
for (var i = 0; i < multiString.StringCount; i++)
{
var tsString = multiString.GetStringFromIndex(i, out var ws);
if (string.IsNullOrEmpty(tsString.Text)) continue;
var value = tsString.Text.AsSpan().Trim(WhitespaceAndFormattingChars);
if (!value.IsEmpty && wsExemplars.TryGetValue(ws, out var exemplars))
var value = new StringInfo(tsString.Text).SubstringByTextElements(0, 1)
// in some cases we need to trim things both before and after the grapheme
.Trim(WhitespaceAndFormattingChars);
if (value.Length > 0 && wsExemplars.TryGetValue(ws, out var exemplars))
{
//todo should we upper or lowercase the value?
exemplars.Add(value[0]);
exemplars.Add(value.ToUpper());
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion frontend/viewer/src/ProjectView.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@
function selectEntry(entry: IEntry) {
$selectedEntry = entry;
$selectedIndexExemplar = headword(entry).charAt(0).toLocaleUpperCase() || undefined;
const indexChar: string | undefined = new Intl.Segmenter().segment(headword(entry))[Symbol.iterator]().next()?.value?.segment;
$selectedIndexExemplar = indexChar?.toLocaleUpperCase() ?? undefined;
refreshEntries();
pickedEntry = true;
}
Expand Down

0 comments on commit 614dbd5

Please sign in to comment.