Skip to content

Commit

Permalink
Echo engine endpoint coverage (#80)
Browse files Browse the repository at this point in the history
* Fixed echo coverage of pretrans --ECL

* Small fix --ECL

* Close #66 --ECL

* Echo engine documentation and fixes #72 --ECL

* Fixes #72

Forgot to circumvent matching target/source language error for Echo for corpora --ECL

* Fixes as per PR review --ECL

* Requested changes --ECL

* The name of the client helper function changed on master.

---------

Co-authored-by: John Lambert <john_lambert@sil.org>
  • Loading branch information
Enkidu93 and johnml1135 authored Aug 22, 2023
1 parent 54f418a commit 74305c3
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 140 deletions.
8 changes: 4 additions & 4 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ services:
- ASPNETCORE_TranslationEngines__0=SmtTransfer
- ASPNETCORE_TranslationEngines__0=Nmt
- ClearMLNmtEngine__ApiServer=https://api.sil.hosted.allegro.ai
- ClearMLNmtEngine__Queue=lambert_24gb
- ClearMLNmtEngine__DockerImage=mpy.local
- ClearMLNmtEngine__Queue=production
- ClearMLNmtEngine__DockerImage=ghcr.io/sillsdev/machine.py:0.9.3.6
- ClearMLNmtEngine__MaxSteps=10
- "ClearMLNmtEngine__AccessKey=${ClearML_AccessKey:?access key needed}"
- "ClearMLNmtEngine__SecretKey=${ClearML_SecretKey:?secret key needed}"
Expand Down Expand Up @@ -136,8 +136,8 @@ services:
- ASPNETCORE_Kestrel__EndpointDefaults__Protocols=Http2
- ASPNETCORE_TranslationEngines__0=SmtTransfer
- ClearMLNmtEngine__ApiServer=https://api.sil.hosted.allegro.ai
- ClearMLNmtEngine__Queue=lambert_24gb
- ClearMLNmtEngine__DockerImage=mpy.local
- ClearMLNmtEngine__Queue=production
- ClearMLNmtEngine__DockerImage=ghcr.io/sillsdev/machine.py:0.9.3.6
- ClearMLNmtEngine__MaxSteps=10
- "ClearMLNmtEngine__AccessKey=${ClearML_AccessKey:?access key needed}"
- "ClearMLNmtEngine__SecretKey=${ClearML_SecretKey:?secret key needed}"
Expand Down
83 changes: 66 additions & 17 deletions samples/EchoTranslationEngine/TranslationEngineServiceV1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ public TranslationEngineServiceV1(BackgroundTaskQueue taskQueue)

public override Task<Empty> Create(CreateRequest request, ServerCallContext context)
{
if (request.SourceLanguage != request.TargetLanguage)
{
Status status = new Status(StatusCode.InvalidArgument, "Source and target languages must be the same");
throw new RpcException(status);
}
return Task.FromResult(Empty);
}

Expand Down Expand Up @@ -106,8 +111,7 @@ await client.BuildStartedAsync(
if (targetFiles.TryGetValue(sourceFile.Key, out string? targetPath))
{
string[] targetLines = await File.ReadAllLinesAsync(targetPath, cancellationToken);
bool isTabSeparated =
(sourceLines.Length > 0) && (sourceLines[0].Split('\t').Length > 1);
bool isTabSeparated = (sourceLines.Length > 0) && sourceLines[0].Contains('/');
if (!isTabSeparated)
{
int lineNum = 1;
Expand All @@ -117,7 +121,7 @@ await client.BuildStartedAsync(
.Zip(targetLines.Select(l => l.Trim()))
)
{
if (sourceLine.Length > 0)
if (sourceLine.Length > 0 && targetLine.Length == 0)
{
await call.RequestStream.WriteAsync(
new InsertPretranslationRequest
Expand All @@ -126,7 +130,7 @@ await call.RequestStream.WriteAsync(
CorpusId = corpus.Id,
TextId = sourceFile.Key,
Refs = { $"{sourceFile.Key}:{lineNum}" },
Translation = targetLine.Length > 0 ? targetLine : sourceLine
Translation = sourceLine
},
cancellationToken
);
Expand All @@ -150,24 +154,26 @@ await call.RequestStream.WriteAsync(
sourceLinesDict.TryGetValue(targetLineKVPair.Key, out sourceLine);
sourceLine ??= string.Empty;
string? targetLine = targetLineKVPair.Value;
await call.RequestStream.WriteAsync(
new InsertPretranslationRequest
{
EngineId = request.EngineId,
CorpusId = corpus.Id,
TextId = sourceFile.Key,
Refs = { $"{sourceFile.Key}:{targetLineKVPair.Key}" },
Translation = targetLine.Length > 0 ? targetLine : sourceLine
},
cancellationToken
);
if (sourceLine.Length > 0 && targetLine.Length == 0)
{
await call.RequestStream.WriteAsync(
new InsertPretranslationRequest
{
EngineId = request.EngineId,
CorpusId = corpus.Id,
TextId = sourceFile.Key,
Refs = { $"{sourceFile.Key}:{targetLineKVPair.Key}" },
Translation = sourceLine
},
cancellationToken
);
}
}
}
}
else
{
bool isTabSeparated =
(sourceLines.Length > 0) && (sourceLines[0].Split('\t').Length > 1);
bool isTabSeparated = (sourceLines.Length > 0) && sourceLines[0].Contains('/');
if (!isTabSeparated)
{
int lineNum = 1;
Expand Down Expand Up @@ -244,4 +250,47 @@ await client.BuildFaultedAsync(

return Empty;
}

public override Task<Empty> TrainSegmentPair(TrainSegmentPairRequest request, ServerCallContext _)
{
return Task.FromResult(Empty);
}

public override Task<GetWordGraphResponse> GetWordGraph(GetWordGraphRequest request, ServerCallContext _)
{
var tokens = request.Segment.Split();
return Task.FromResult(
new GetWordGraphResponse
{
WordGraph = new WordGraph
{
InitialStateScore = 0.0,
SourceTokens = { tokens },
Arcs =
{
Enumerable
.Range(0, tokens.Length - 1)
.Select(
index =>
new WordGraphArc
{
PrevState = index,
NextState = index + 1,
Score = 1.0,
TargetTokens = { tokens[index] },
Confidences = { 1.0 },
SourceSegmentStart = index,
SourceSegmentEnd = index + 1,
Alignment =
{
new AlignedWordPair { SourceIndex = 0, TargetIndex = 0 }
}
}
)
},
FinalStates = { tokens.Length }
}
}
);
}
}
38 changes: 22 additions & 16 deletions src/Serval.Client/Client.g.cs
Original file line number Diff line number Diff line change
Expand Up @@ -794,18 +794,21 @@ public partial interface ITranslationEnginesClient
/// <br/> * Recommendation: Create a multi-part name to distinguish between projects, uses, etc.
/// <br/> * The name does not have to be unique, as the engine is uniquely identified by the auto-generated id
/// <br/>* **sourceLanguage**: The source language code
/// <br/> * Note that for NMT, if the source or target language code matches an [NLLB-200 code](https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200), it will map directly and use the language as-is.
/// <br/> * Note that for Nmt, if the source or target language code matches an [NLLB-200 code](https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200), it will map directly and use the language as-is.
/// <br/>* **targetLanguage**: The target language code
/// <br/>* **type**: Either **SmtTransfer** or **Nmt**
/// <br/>* **type**: **SmtTransfer** or **Nmt** or **Echo**
/// <br/>### SmtTransfer
/// <br/>The Statistical Machine Translation Transfer Learning engine is primarily used for translation suggestions.
/// <br/>Typical endpoints: translate, get-word-graph, train-segment
/// <br/>### Nmt
/// <br/>The Neural Machine Translation engine is primarily used for pretranslations. It is
/// <br/>fine tuned from the NLLB-200 from Meta and inherits thw 200 language codes.
/// <br/>fine tuned from the NLLB-200 from Meta and inherits the 200 language codes.
/// <br/>Typical endpoints: pretranslate
/// <br/>### Echo
/// <br/>Has coverage of creation, building, and translation endpoints
/// <br/>The Echo engine has full coverage of all Nmt and SmtTransfer endpoints. Endpoints like create and build
/// <br/>return empty responses. Endpoints like translate and get-word-graph echo the sent content back to the user
/// <br/>in a format that mocks Nmt or Smt. For example, translating a segment "test" with the Echo engine would
/// <br/>yield a translation response with translation "test". This engine is useful for debugging and testing purposes.
/// <br/>## Sample request:
/// <br/>
/// <br/> {
Expand Down Expand Up @@ -889,14 +892,14 @@ public partial interface ITranslationEnginesClient
/// <remarks>
/// ## Parameters
/// <br/>* **name**: A name to help identify and distinguish the corpus from other corpora
/// <br/> * The name does not have to be unique, as the corpus is uniquely identified by the auto-generated id
/// <br/> * The name does not have to be unique since the corpus is uniquely identified by an auto-generated id
/// <br/>* **sourceLanguage**: The source language code
/// <br/> * Normally, this is the same as the engine sourceLanguage. This may change for future engines as a means of transfer learning.
/// <br/>* **targetLanguage**: The target language code
/// <br/>* **SourceFiles**: The source files associated with the corpus
/// <br/> * **FileId**: The unique id referencing the uploaded file
/// <br/> * **TextId**: The client defined name to associate source and target files.
/// <br/> * If the TextId in the SourceFiles and TargetFiles matches, they will be used to train the engine.
/// <br/> * **TextId**: The client-defined name to associate source and target files.
/// <br/> * If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine.
/// <br/> * If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines
/// <br/> of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated.
/// <br/> * A TextId should only be used at most once in SourceFiles and in TargetFiles.
Expand All @@ -923,7 +926,7 @@ public partial interface ITranslationEnginesClient
/// Update a corpus with a new set of files
/// </summary>
/// <remarks>
/// See posting a new corpus for details of use. Will completely replace corpora files associations.
/// See posting a new corpus for details of use. Will completely replace corpus' file associations.
/// <br/>Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete.
/// </remarks>
/// <param name="id">The translation engine id</param>
Expand Down Expand Up @@ -1190,18 +1193,21 @@ public string BaseUrl
/// <br/> * Recommendation: Create a multi-part name to distinguish between projects, uses, etc.
/// <br/> * The name does not have to be unique, as the engine is uniquely identified by the auto-generated id
/// <br/>* **sourceLanguage**: The source language code
/// <br/> * Note that for NMT, if the source or target language code matches an [NLLB-200 code](https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200), it will map directly and use the language as-is.
/// <br/> * Note that for Nmt, if the source or target language code matches an [NLLB-200 code](https://github.com/facebookresearch/flores/tree/main/flores200#languages-in-flores-200), it will map directly and use the language as-is.
/// <br/>* **targetLanguage**: The target language code
/// <br/>* **type**: Either **SmtTransfer** or **Nmt**
/// <br/>* **type**: **SmtTransfer** or **Nmt** or **Echo**
/// <br/>### SmtTransfer
/// <br/>The Statistical Machine Translation Transfer Learning engine is primarily used for translation suggestions.
/// <br/>Typical endpoints: translate, get-word-graph, train-segment
/// <br/>### Nmt
/// <br/>The Neural Machine Translation engine is primarily used for pretranslations. It is
/// <br/>fine tuned from the NLLB-200 from Meta and inherits thw 200 language codes.
/// <br/>fine tuned from the NLLB-200 from Meta and inherits the 200 language codes.
/// <br/>Typical endpoints: pretranslate
/// <br/>### Echo
/// <br/>Has coverage of creation, building, and translation endpoints
/// <br/>The Echo engine has full coverage of all Nmt and SmtTransfer endpoints. Endpoints like create and build
/// <br/>return empty responses. Endpoints like translate and get-word-graph echo the sent content back to the user
/// <br/>in a format that mocks Nmt or Smt. For example, translating a segment "test" with the Echo engine would
/// <br/>yield a translation response with translation "test". This engine is useful for debugging and testing purposes.
/// <br/>## Sample request:
/// <br/>
/// <br/> {
Expand Down Expand Up @@ -2016,14 +2022,14 @@ public string BaseUrl
/// <remarks>
/// ## Parameters
/// <br/>* **name**: A name to help identify and distinguish the corpus from other corpora
/// <br/> * The name does not have to be unique, as the corpus is uniquely identified by the auto-generated id
/// <br/> * The name does not have to be unique since the corpus is uniquely identified by an auto-generated id
/// <br/>* **sourceLanguage**: The source language code
/// <br/> * Normally, this is the same as the engine sourceLanguage. This may change for future engines as a means of transfer learning.
/// <br/>* **targetLanguage**: The target language code
/// <br/>* **SourceFiles**: The source files associated with the corpus
/// <br/> * **FileId**: The unique id referencing the uploaded file
/// <br/> * **TextId**: The client defined name to associate source and target files.
/// <br/> * If the TextId in the SourceFiles and TargetFiles matches, they will be used to train the engine.
/// <br/> * **TextId**: The client-defined name to associate source and target files.
/// <br/> * If the TextIds in the SourceFiles and TargetFiles match, they will be used to train the engine.
/// <br/> * If selected for pretranslation when building, all SourceFiles that have no TargetFile, or lines
/// <br/> of text in a SourceFile that have missing or blank lines in the TargetFile will be pretranslated.
/// <br/> * A TextId should only be used at most once in SourceFiles and in TargetFiles.
Expand Down Expand Up @@ -2249,7 +2255,7 @@ public string BaseUrl
/// Update a corpus with a new set of files
/// </summary>
/// <remarks>
/// See posting a new corpus for details of use. Will completely replace corpora files associations.
/// See posting a new corpus for details of use. Will completely replace corpus' file associations.
/// <br/>Will not affect jobs already queued or running. Will not affect existing pretranslations until new build is complete.
/// </remarks>
/// <param name="id">The translation engine id</param>
Expand Down
Loading

0 comments on commit 74305c3

Please sign in to comment.