Skip to content

Commit

Permalink
Merge branch 'main' into add_serval_release_version
Browse files Browse the repository at this point in the history
  • Loading branch information
mudiagaobrikisil authored Oct 18, 2024
2 parents 937c563 + bdf43fa commit 7b1172a
Show file tree
Hide file tree
Showing 17 changed files with 388 additions and 89 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
build:
name: Build
runs-on: ubuntu-latest
timeout-minutes: 45
timeout-minutes: 60

env:
SERVAL_CLIENT_ID: ${{ secrets.SERVAL_CLIENT_ID }}
Expand Down
4 changes: 2 additions & 2 deletions deploy/qa-ext-values.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
externalHost: qa.serval-api.org
environment: Production
deploymentVersion: '1.6.QA3'
deploymentVersion: '1.7.QA0'
alertEmail: ext-qa-serval-alerts@languagetechnology.org
emailsToAlert: john_lambert@sil.org
enableTls: true
namespace: serval
auth0Domain: dev-sillsdev.auth0.com
lokiTenent: serval-tenant
lokiUrl: http://loki-distributed-gateway.loki.svc.cluster.local
servalImage: ghcr.io/sillsdev/serval:1.6.3
servalImage: ghcr.io/sillsdev/serval:1.7.0
ClearMLDockerImage: ghcr.io/sillsdev/machine.py:1.6.3
ClearMLQueue: production
MongoConnectionPrefix: qa_
Expand Down
2 changes: 1 addition & 1 deletion deploy/qa-int-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace: nlp
auth0Domain: sil-appbuilder.auth0.com
lokiTenent: nlp-tenant
lokiUrl: http://loki-distributed-gateway.loki.svc.cluster.local
servalImage: ghcr.io/sillsdev/serval:1.6.1
servalImage: ghcr.io/sillsdev/serval:1.7.0
ClearMLDockerImage: ghcr.io/sillsdev/machine.py:1.6.3
ClearMLQueue: lambert_24gb
MongoConnectionPrefix: qa_int_
Expand Down
3 changes: 0 additions & 3 deletions src/Echo/src/EchoTranslationEngine/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@

WebApplication app = builder.Build();

// Configure the HTTP request pipeline.
app.UseHttpsRedirection();

app.MapGrpcService<TranslationEngineServiceV1>();
app.MapGrpcService<HealthServiceV1>();

Expand Down
12 changes: 10 additions & 2 deletions src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,23 @@ await client.BuildStartedAsync(
var sourceFiles = corpus
.SourceCorpora.SelectMany(sc =>
sc.Files.Where(f =>
(sc.PretranslateTextIds is null || sc.PretranslateTextIds.Contains(f.TextId))
(
sc.PretranslateAll
|| sc.PretranslateTextIds is null
|| sc.PretranslateTextIds.Contains(f.TextId)
)
&& f.Format == FileFormat.Text
)
)
.ToDictionary(f => f.TextId, f => f.Location);
var targetFiles = corpus
.TargetCorpora.SelectMany(tc =>
tc.Files.Where(f =>
(tc.PretranslateTextIds is null || tc.PretranslateTextIds.Contains(f.TextId))
(
tc.PretranslateAll
|| tc.PretranslateTextIds is null
|| tc.PretranslateTextIds.Contains(f.TextId)
)
&& f.Format == FileFormat.Text
)
)
Expand Down
2 changes: 0 additions & 2 deletions src/Machine/src/Serval.Machine.EngineServer/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@

var app = builder.Build();

app.UseHttpsRedirection();

app.MapServalTranslationEngineService();
app.MapHangfireDashboard();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@
<PackageReference Include="Hangfire.Mongo" Version="1.10.8" />
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="8.0.8" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="8.0.8" />
<PackageReference Include="SIL.Machine" Version="3.2.8" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
<PackageReference Include="SIL.Machine.Morphology.HermitCrab" Version="3.2.8" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Morphology.HermitCrab\SIL.Machine.Morphology.HermitCrab.csproj')" />
<PackageReference Include="SIL.Machine.Translation.Thot" Version="3.2.8" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj')" />
<PackageReference Include="SIL.Machine" Version="3.4.0" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
<PackageReference Include="SIL.Machine.Morphology.HermitCrab" Version="3.4.0" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Morphology.HermitCrab\SIL.Machine.Morphology.HermitCrab.csproj')" />
<PackageReference Include="SIL.Machine.Translation.Thot" Version="3.4.0" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine.Translation.Thot\SIL.Machine.Translation.Thot.csproj')" />
<PackageReference Include="SIL.WritingSystems" Version="14.1.1" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,22 +238,27 @@ row.Ref is not ScriptureRef sr
}
}
}
void WriteRow(Utf8JsonWriter writer, string textId, IReadOnlyList<object> refs, string translation)
{
writer.WriteStartObject();
writer.WriteString("corpusId", corpus.Id);
writer.WriteString("textId", textId);
writer.WriteStartArray("refs");
foreach (object rowRef in refs)
writer.WriteStringValue(rowRef.ToString());
writer.WriteEndArray();
writer.WriteString("translation", translation);
writer.WriteEndObject();
pretranslateCount++;
}

ITextCorpus targetCorpus =
targetCorpora.Length > 0 ? targetCorpora[0].TextCorpus : new DictionaryTextCorpus();

foreach (Row row in AlignPretranslateCorpus(sourcePretranslateCorpora, targetCorpora[0].TextCorpus))
foreach (Row row in AlignPretranslateCorpus(sourcePretranslateCorpora, targetCorpus))
{
if (row.SourceSegment.Length > 0)
{
pretranslateWriter.WriteStartObject();
pretranslateWriter.WriteString("corpusId", corpus.Id);
pretranslateWriter.WriteString("textId", row.TextId);
pretranslateWriter.WriteStartArray("refs");
foreach (object rowRef in row.Refs)
pretranslateWriter.WriteStringValue(rowRef.ToString());
pretranslateWriter.WriteEndArray();
pretranslateWriter.WriteString("translation", row.SourceSegment);
pretranslateWriter.WriteEndObject();
pretranslateCount++;
}
WriteRow(pretranslateWriter, row.TextId, row.Refs, row.SourceSegment);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,16 +286,20 @@ private static Models.MonolingualCorpus Map(Translation.V1.MonolingualCorpus sou
kvp => kvp.Value.Chapters.ToHashSet()
);
var trainOnTextIds = source.TrainOnTextIds.ToHashSet();
FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds);
FilterChoice trainingFilter = GetFilterChoice(trainOnChapters, trainOnTextIds, source.TrainOnAll);

var pretranslateChapters = source.PretranslateChapters.ToDictionary(
kvp => kvp.Key,
kvp => kvp.Value.Chapters.ToHashSet()
);
var pretranslateTextIds = source.PretranslateTextIds.ToHashSet();
FilterChoice pretranslateFilter = GetFilterChoice(pretranslateChapters, pretranslateTextIds);
FilterChoice pretranslateFilter = GetFilterChoice(
pretranslateChapters,
pretranslateTextIds,
source.PretranslateAll
);

return new Models.MonolingualCorpus
var corpus = new Models.MonolingualCorpus
{
Id = source.Id,
Language = source.Language,
Expand All @@ -305,6 +309,7 @@ private static Models.MonolingualCorpus Map(Translation.V1.MonolingualCorpus sou
PretranslateChapters = pretranslateFilter == FilterChoice.Chapters ? pretranslateChapters : null,
PretranslateTextIds = pretranslateFilter == FilterChoice.TextIds ? pretranslateTextIds : null
};
return corpus;
}

private static Models.CorpusFile Map(Translation.V1.CorpusFile source)
Expand All @@ -326,12 +331,13 @@ private enum FilterChoice

private static FilterChoice GetFilterChoice(
IReadOnlyDictionary<string, HashSet<int>> chapters,
HashSet<string> textIds
HashSet<string> textIds,
bool noFilter
)
{
// Only either textIds or Scripture Range will be used at a time
// TextIds may be an empty array, so prefer that if both are empty (which applies to both scripture and text)
if (chapters is null && textIds is null)
if (noFilter || (chapters is null && textIds is null))
return FilterChoice.None;
if (chapters is null || chapters.Count == 0)
return FilterChoice.TextIds;
Expand Down
2 changes: 1 addition & 1 deletion src/Serval/src/Serval.Client/Serval.Client.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<TargetFramework>netstandard2.1</TargetFramework>
<Version>1.5.0</Version>
<Version>1.7.0</Version>
<Description>Client classes for Serval.</Description>
<RootNamespace>Serval.Client</RootNamespace>
<Product>Serval</Product>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ message ParallelCorpus {
message MonolingualCorpus {
string id = 1;
string language = 2;
bool train_on_all = 3;
bool pretranslate_all = 4;
map<string, ScriptureChapters> train_on_chapters = 5;
map<string, ScriptureChapters> pretranslate_chapters = 6;
repeated string train_on_text_ids = 7;
Expand Down
2 changes: 1 addition & 1 deletion src/Serval/src/Serval.Shared/Serval.Shared.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<PackageReference Include="Grpc.Core.Api" Version="2.65.0" />
<PackageReference Include="Grpc.HealthCheck" Version="2.65.0" />
<PackageReference Include="Grpc.Net.ClientFactory" Version="2.65.0" />
<PackageReference Include="SIL.Machine" Version="3.2.6" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
<PackageReference Include="SIL.Machine" Version="3.4.0" Condition="!Exists('..\..\..\..\..\machine\src\SIL.Machine\SIL.Machine.csproj')" />
<PackageReference Include="Microsoft.FeatureManagement.AspNetCore" Version="3.5.0" />
</ItemGroup>

Expand Down
51 changes: 41 additions & 10 deletions src/Serval/src/Serval.Translation/Services/EngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,12 @@ private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, Pre
V1.MonolingualCorpus targetCorpus =
new() { Language = source.TargetLanguage, Files = { source.TargetFiles.Select(Map) } };

if (trainingCorpus != null)
if (trainingCorpus is null || (trainingCorpus.TextIds is null && trainingCorpus.ScriptureRange is null))
{
sourceCorpus.TrainOnAll = true;
targetCorpus.TrainOnAll = true;
}
else
{
if (trainingCorpus.TextIds is not null && trainingCorpus.ScriptureRange is not null)
{
Expand Down Expand Up @@ -636,7 +641,15 @@ private V1.ParallelCorpus Map(Corpus source, TrainingCorpus? trainingCorpus, Pre
targetCorpus.TrainOnChapters.Add(chapters);
}
}
if (pretranslateCorpus != null)
if (
pretranslateCorpus is null
|| (pretranslateCorpus.TextIds is null && pretranslateCorpus.ScriptureRange is null)
)
{
sourceCorpus.PretranslateAll = true;
targetCorpus.PretranslateAll = true;
}
else
{
if (pretranslateCorpus.TextIds is not null && pretranslateCorpus.ScriptureRange is not null)
{
Expand Down Expand Up @@ -767,14 +780,32 @@ pretranslateFilter is not null
Files = { source.Files.Select(Map) }
};

if (trainOnChapters is not null)
corpus.TrainOnChapters.Add(trainOnChapters);
if (trainingFilter?.TextIds is not null)
corpus.TrainOnTextIds.Add(trainingFilter.TextIds);
if (pretranslateChapters is not null)
corpus.PretranslateChapters.Add(pretranslateChapters);
if (pretranslateFilter?.TextIds is not null)
corpus.PretranslateTextIds.Add(pretranslateFilter.TextIds);
if (trainingFilter is null || (trainingFilter.TextIds is null && trainingFilter.ScriptureRange is null))
{
corpus.TrainOnAll = true;
}
else
{
if (trainOnChapters is not null)
corpus.TrainOnChapters.Add(trainOnChapters);
if (trainingFilter?.TextIds is not null)
corpus.TrainOnTextIds.Add(trainingFilter.TextIds);
}

if (
pretranslateFilter is null
|| (pretranslateFilter.TextIds is null && pretranslateFilter.ScriptureRange is null)
)
{
corpus.PretranslateAll = true;
}
else
{
if (pretranslateChapters is not null)
corpus.PretranslateChapters.Add(pretranslateChapters);
if (pretranslateFilter?.TextIds is not null)
corpus.PretranslateTextIds.Add(pretranslateFilter.TextIds);
}

return corpus;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1660,6 +1660,105 @@ public async Task StartBuildAsync_ParallelCorpus()
Assert.That(build, Is.Not.Null);
}

[Test]
public async Task StartBuildAsync_Corpus_NoFilter()
{
TranslationEnginesClient client = _env.CreateTranslationEnginesClient();
TranslationCorpus addedCorpus = await client.AddCorpusAsync(NMT_ENGINE1_ID, TestCorpusConfig);
PretranslateCorpusConfig ptcc =
new() { CorpusId = addedCorpus.Id, SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }] };
TrainingCorpusConfig tcc =
new()
{
CorpusId = addedCorpus.Id,
SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }],
TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID }]
};
;
TranslationBuildConfig tbc = new TranslationBuildConfig
{
Pretranslate = [ptcc],
TrainOn = [tcc],
Options = """
{"max_steps":10,
"use_key_terms":false,
"some_double":10.5,
"some_nested": {"more_nested": {"other_double":10.5}},
"some_string":"string"}
"""
};
TranslationBuild resultAfterStart;
Assert.ThrowsAsync<ServalApiException>(async () =>
{
resultAfterStart = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
});

TranslationBuild build = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc);
Assert.That(build, Is.Not.Null);
Assert.That(build.TrainOn, Is.Not.Null);
Assert.That(build.TrainOn.Count, Is.EqualTo(1));
Assert.That(build.TrainOn[0].TextIds, Is.Null);
Assert.That(build.TrainOn[0].ScriptureRange, Is.Null);
Assert.That(build.Pretranslate, Is.Not.Null);
Assert.That(build.Pretranslate.Count, Is.EqualTo(1));
Assert.That(build.Pretranslate[0].TextIds, Is.Null);
Assert.That(build.Pretranslate[0].ScriptureRange, Is.Null);

build = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
Assert.That(build, Is.Not.Null);
}

[Test]
public async Task StartBuildAsync_ParallelCorpus_NoFilter()
{
TranslationEnginesClient client = _env.CreateTranslationEnginesClient();
TranslationParallelCorpus addedCorpus = await client.AddParallelCorpusAsync(
NMT_ENGINE1_ID,
TestParallelCorpusConfig
);
PretranslateCorpusConfig ptcc =
new() { ParallelCorpusId = addedCorpus.Id, SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }] };
TrainingCorpusConfig tcc =
new()
{
ParallelCorpusId = addedCorpus.Id,
SourceFilters = [new() { CorpusId = SOURCE_CORPUS_ID }],
TargetFilters = [new() { CorpusId = TARGET_CORPUS_ID }]
};
;
TranslationBuildConfig tbc = new TranslationBuildConfig
{
Pretranslate = [ptcc],
TrainOn = [tcc],
Options = """
{"max_steps":10,
"use_key_terms":false,
"some_double":10.5,
"some_nested": {"more_nested": {"other_double":10.5}},
"some_string":"string"}
"""
};
TranslationBuild resultAfterStart;
Assert.ThrowsAsync<ServalApiException>(async () =>
{
resultAfterStart = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
});

TranslationBuild build = await client.StartBuildAsync(NMT_ENGINE1_ID, tbc);
Assert.That(build, Is.Not.Null);
Assert.That(build.TrainOn, Is.Not.Null);
Assert.That(build.TrainOn.Count, Is.EqualTo(1));
Assert.That(build.TrainOn[0].TextIds, Is.Null);
Assert.That(build.TrainOn[0].ScriptureRange, Is.Null);
Assert.That(build.Pretranslate, Is.Not.Null);
Assert.That(build.Pretranslate.Count, Is.EqualTo(1));
Assert.That(build.Pretranslate[0].TextIds, Is.Null);
Assert.That(build.Pretranslate[0].ScriptureRange, Is.Null);

build = await client.GetCurrentBuildAsync(NMT_ENGINE1_ID);
Assert.That(build, Is.Not.Null);
}

[Test]
public async Task StartBuildAsync_ParallelCorpus_PretranslateParallelAndNormalCorpus()
{
Expand Down
Loading

0 comments on commit 7b1172a

Please sign in to comment.