-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PretranslationService.cs
161 lines (150 loc) · 6.15 KB
/
PretranslationService.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
using SIL.Machine.Corpora;
namespace Serval.Translation.Services;
public class PretranslationService(
IRepository<Pretranslation> pretranslations,
IRepository<Engine> engines,
IScriptureDataFileService scriptureDataFileService
) : EntityServiceBase<Pretranslation>(pretranslations), IPretranslationService
{
private readonly IRepository<Engine> _engines = engines;
private readonly IScriptureDataFileService _scriptureDataFileService = scriptureDataFileService;
public async Task<IEnumerable<Pretranslation>> GetAllAsync(
string engineId,
int modelRevision,
string corpusId,
string? textId = null,
CancellationToken cancellationToken = default
)
{
return await Entities.GetAllAsync(
pt =>
pt.EngineRef == engineId
&& pt.ModelRevision == modelRevision
&& pt.CorpusRef == corpusId
&& (textId == null || pt.TextId == textId),
cancellationToken
);
}
public async Task<string> GetUsfmAsync(
string engineId,
int modelRevision,
string corpusId,
string textId,
PretranslationUSFMTextOrigin textOrigin = PretranslationUSFMTextOrigin.PreferExisting,
CancellationToken cancellationToken = default
)
{
Engine? engine = await _engines.GetAsync(engineId, cancellationToken);
Corpus? corpus = engine?.Corpora.SingleOrDefault(c => c.Id == corpusId);
if (corpus is null)
throw new EntityNotFoundException($"Could not find the Corpus '{corpusId}' in Engine '{engineId}'.");
CorpusFile sourceFile = corpus.SourceFiles[0];
CorpusFile targetFile = corpus.TargetFiles[0];
if (sourceFile.Format is not FileFormat.Paratext || targetFile.Format is not FileFormat.Paratext)
throw new InvalidOperationException("USFM format is not valid for non-Scripture corpora.");
ParatextProjectSettings sourceSettings = _scriptureDataFileService.GetParatextProjectSettings(
sourceFile.Filename
);
ParatextProjectSettings targetSettings = _scriptureDataFileService.GetParatextProjectSettings(
targetFile.Filename
);
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> pretranslations = (
await GetAllAsync(engineId, modelRevision, corpusId, textId, cancellationToken)
)
.Select(p =>
(
(IReadOnlyList<ScriptureRef>)
p.Refs.Select(r => ScriptureRef.Parse(r, targetSettings.Versification)).ToList(),
p.Translation
)
)
.OrderBy(p => p.Item1[0])
.ToList();
// Update the target book if it exists
string? usfm = await _scriptureDataFileService.ReadParatextProjectBookAsync(targetFile.Filename, textId);
if (usfm is not null)
{
switch (textOrigin)
{
case PretranslationUSFMTextOrigin.PreferExisting:
return UpdateUsfm(
targetSettings,
usfm,
pretranslations,
strictComparison: false,
preferExistingText: true
);
case PretranslationUSFMTextOrigin.PreferPretranslated:
return UpdateUsfm(
targetSettings,
usfm,
pretranslations,
strictComparison: false,
preferExistingText: false
);
case PretranslationUSFMTextOrigin.OnlyExisting:
return usfm;
case PretranslationUSFMTextOrigin.OnlyPretranslated:
return UpdateUsfm(
targetSettings,
usfm,
pretranslations,
strictComparison: false,
stripAllText: true,
preferExistingText: false
);
}
}
// Copy and update the source book if it exists
usfm = await _scriptureDataFileService.ReadParatextProjectBookAsync(sourceFile.Filename, textId);
if (usfm is not null)
{
switch (textOrigin)
{
case PretranslationUSFMTextOrigin.PreferExisting:
case PretranslationUSFMTextOrigin.PreferPretranslated:
case PretranslationUSFMTextOrigin.OnlyPretranslated:
return UpdateUsfm(
sourceSettings,
usfm,
pretranslations,
targetSettings.FullName,
stripAllText: true,
strictComparison: true,
preferExistingText: true
);
case PretranslationUSFMTextOrigin.OnlyExisting:
return UpdateUsfm(
sourceSettings,
usfm,
[], // don't pass the pretranslations, we only want the existing text.
targetSettings.FullName,
stripAllText: true,
strictComparison: true,
preferExistingText: true
);
}
}
return "";
}
private static string UpdateUsfm(
ParatextProjectSettings settings,
string usfm,
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> pretranslations,
string? fullName = null,
bool stripAllText = false,
bool strictComparison = false,
bool preferExistingText = true
)
{
var updater = new UsfmTextUpdater(
pretranslations,
fullName is null ? null : $"- {fullName}",
stripAllText,
strictComparison: strictComparison,
preferExistingText: preferExistingText
);
UsfmParser.Parse(usfm, updater, settings.Stylesheet, settings.Versification);
return updater.GetUsfm(settings.Stylesheet);
}
}