Skip to content

Commit

Permalink
Working machine-side logic implementation; (one outstanding test that…
Browse files Browse the repository at this point in the history
… should be written 'TODO')

Note: With this new implementation, text marked for pretranslation will be pretranslated even if there's target text for the same verses. I believe this is not an issue (and maybe the previous implementation was a mistake) given the USFM source text preference options.
  • Loading branch information
Enkidu93 committed Oct 3, 2024
1 parent 29cb846 commit 7c13dc0
Show file tree
Hide file tree
Showing 25 changed files with 1,416 additions and 1,439 deletions.
21 changes: 11 additions & 10 deletions src/Echo/src/EchoTranslationEngine/TranslationEngineServiceV1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,21 +80,22 @@ await client.BuildStartedAsync(
client.InsertPretranslations(cancellationToken: cancellationToken)
)
{
foreach (Corpus corpus in request.Corpora)
foreach (ParallelCorpus corpus in request.Corpora)
{
if (!corpus.PretranslateAll && corpus.PretranslateTextIds.Count == 0)
continue;
var sourceFiles = corpus
.SourceFiles.Where(f =>
(corpus.PretranslateAll || corpus.PretranslateTextIds.Contains(f.TextId))
&& f.Format == FileFormat.Text
.SourceCorpora.SelectMany(sc =>
sc.Files.Where(f =>
(sc.PretranslateTextIds is null || sc.PretranslateTextIds.Contains(f.TextId))
&& f.Format == FileFormat.Text
)
)
.ToDictionary(f => f.TextId, f => f.Location);
var targetFiles = corpus
.TargetFiles.Where(f =>
(corpus.PretranslateAll || corpus.PretranslateTextIds.Contains(f.TextId))
&& f.Format == FileFormat.Text
.TargetCorpora.SelectMany(tc =>
tc.Files.Where(f =>
(tc.PretranslateTextIds is null || tc.PretranslateTextIds.Contains(f.TextId))
&& f.Format == FileFormat.Text
)
)
.ToDictionary(f => f.TextId, f => f.Location);
Expand Down
14 changes: 0 additions & 14 deletions src/Machine/src/Serval.Machine.Shared/Models/Corpus.cs

This file was deleted.

12 changes: 12 additions & 0 deletions src/Machine/src/Serval.Machine.Shared/Models/MonolingualCorpus.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
namespace Serval.Machine.Shared.Models;

public record MonolingualCorpus
{
public required string Id { get; set; }
public required string Language { get; set; }
public required IReadOnlyList<CorpusFile> Files { get; set; }
public HashSet<string>? TrainOnTextIds { get; set; }
public Dictionary<string, HashSet<int>>? TrainOnChapters { get; set; }
public HashSet<string>? PretranslateTextIds { get; set; }
public Dictionary<string, HashSet<int>>? PretranslateChapters { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace Serval.Machine.Shared.Models;

public record ParallelCorpus
{
public required string Id { get; set; }
public IReadOnlyList<MonolingualCorpus> SourceCorpora { get; set; } = new List<MonolingualCorpus>();
public IReadOnlyList<MonolingualCorpus> TargetCorpora { get; set; } = new List<MonolingualCorpus>();
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Task StartBuildAsync(
string engineId,
string buildId,
string? buildOptions,
IReadOnlyList<Corpus> corpora,
IReadOnlyList<ParallelCorpus> corpora,
CancellationToken cancellationToken = default
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public async Task StartBuildAsync(
string engineId,
string buildId,
string? buildOptions,
IReadOnlyList<Corpus> corpora,
IReadOnlyList<ParallelCorpus> corpora,
CancellationToken cancellationToken = default
)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public Job CreateJob(string engineId, string buildId, BuildStage stage, object?
return stage switch
{
BuildStage.Preprocess
=> CreateJob<NmtPreprocessBuildJob, IReadOnlyList<Corpus>>(
=> CreateJob<NmtPreprocessBuildJob, IReadOnlyList<ParallelCorpus>>(
engineId,
buildId,
"nmt",
Expand Down
Loading

0 comments on commit 7c13dc0

Please sign in to comment.