Skip to content

Commit

Permalink
Move corpora to data files; implement Serval-side mixing logic (draft)
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Sep 20, 2024
1 parent 7c448c4 commit bc055a1
Show file tree
Hide file tree
Showing 43 changed files with 1,773 additions and 450 deletions.
15 changes: 0 additions & 15 deletions Serval.sln
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{C3A14577-A65
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SIL.ServiceToolkit", "src\ServiceToolkit\src\SIL.ServiceToolkit\SIL.ServiceToolkit.csproj", "{0E40F959-C641-40A2-9750-B17A4F9F9E55}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{341EEA9B-9E9E-4316-BE77-15769E03D646}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Serval", "Serval", "{858D3359-F7EA-4088-BDB1-5FDD22F44331}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{70703B03-4E62-4954-93E3-3845B9C678E7}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Serval.Corpora", "src\Serval\src\Serval.Corpora\Serval.Corpora.csproj", "{D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -188,10 +180,6 @@ Global
{0E40F959-C641-40A2-9750-B17A4F9F9E55}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0E40F959-C641-40A2-9750-B17A4F9F9E55}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0E40F959-C641-40A2-9750-B17A4F9F9E55}.Release|Any CPU.Build.0 = Release|Any CPU
{D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -227,9 +215,6 @@ Global
{10657805-48F1-4205-B8F5-79447F6EF620} = {25CDB05B-4E24-4A6E-933E-1E0BEC97D74D}
{C3A14577-A654-4604-818C-4E683DD45A51} = {EA69B41C-49EF-4017-A687-44B9DF37FF98}
{0E40F959-C641-40A2-9750-B17A4F9F9E55} = {C3A14577-A654-4604-818C-4E683DD45A51}
{858D3359-F7EA-4088-BDB1-5FDD22F44331} = {341EEA9B-9E9E-4316-BE77-15769E03D646}
{70703B03-4E62-4954-93E3-3845B9C678E7} = {858D3359-F7EA-4088-BDB1-5FDD22F44331}
{D43D405A-B9C5-4D5B-85C6-65A2BD15ADAD} = {70703B03-4E62-4954-93E3-3845B9C678E7}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {9F18C25E-E140-43C3-B177-D562E1628370}
Expand Down
1 change: 0 additions & 1 deletion src/Serval/src/Serval.ApiServer/Serval.ApiServer.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@

<ItemGroup>
<ProjectReference Include="..\Serval.Assessment\Serval.Assessment.csproj" />
<ProjectReference Include="..\Serval.Corpora\Serval.Corpora.csproj" />
<ProjectReference Include="..\Serval.DataFiles\Serval.DataFiles.csproj" />
<ProjectReference Include="..\Serval.Translation\Serval.Translation.csproj" />
<ProjectReference Include="..\Serval.Webhooks\Serval.Webhooks.csproj" />
Expand Down
1,044 changes: 912 additions & 132 deletions src/Serval/src/Serval.Client/Client.g.cs

Large diffs are not rendered by default.

This file was deleted.

This file was deleted.

9 changes: 0 additions & 9 deletions src/Serval/src/Serval.Corpora/Models/CorpusFile.cs

This file was deleted.

25 changes: 0 additions & 25 deletions src/Serval/src/Serval.Corpora/Serval.Corpora.csproj

This file was deleted.

15 changes: 0 additions & 15 deletions src/Serval/src/Serval.Corpora/Usings.cs

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,16 @@ public static IMongoDataAccessConfigurator AddDataFilesRepositories(this IMongoD
);
return configurator;
}

public static IMongoDataAccessConfigurator AddCorporaRepository(this IMongoDataAccessConfigurator configurator)
{
configurator.AddRepository<Corpus>(
"corpora.corpus",
init: c =>
c.Indexes.CreateOrUpdateAsync(
new CreateIndexModel<Corpus>(Builders<Corpus>.IndexKeys.Ascending(p => p.Owner))
)
);
return configurator;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,10 @@ public static IServalBuilder AddDataFiles(this IServalBuilder builder)
builder.Services.AddHostedService<DeletedFileCleaner>();
return builder;
}

public static IServalBuilder AddCorpora(this IServalBuilder builder)
{
builder.Services.AddScoped<ICorpusService, CorpusService>();
return builder;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ namespace Serval.Corpora.Contracts;

public record CorpusFileDto
{
public required ResourceLinkDto File { get; init; }
public required DataFileDto File { get; init; }
public string? TextId { get; init; }
}
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
using MassTransit;

namespace Serval.Corpora.Controllers;

[ApiVersion("1.0")]
[Route("api/v{version:apiVersion}/corpora")]
[OpenApiTag("Corpora")]
public class CorporaController(IAuthorizationService authService, ICorpusService corpusService, IUrlService urlService)
: ServalControllerBase(authService)
public class CorporaController(
IAuthorizationService authService,
ICorpusService corpusService,
IUrlService urlService,
IDataFileService dataFileService
) : ServalControllerBase(authService)
{
private readonly ICorpusService _corpusService = corpusService;
private readonly IUrlService _urlService = urlService;

private readonly IDataFileService _dataFileService = dataFileService;

/// <summary>
/// Get all corpora
/// </summary>
/// <response code="200">A list of all corpora owned by the client</response>
/// <response code="401">The client is not authenticated</response>
/// <response code="403">The authenticated client cannot perform the operation</response>
/// <response code="503">A necessary service is currently unavailable. Check `/health` for more details. </response>
[Authorize(Scopes.ReadCorpora)]
[Authorize(Scopes.ReadFiles)]
[HttpGet]
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)]
Expand All @@ -39,7 +43,7 @@ public async Task<IEnumerable<CorpusDto>> GetAllAsync(CancellationToken cancella
/// <response code="403">The authenticated client cannot perform the operation or does not own the corpus</response>
/// <response code="404">The corpus does not exist</response>
/// <response code="503">A necessary service is currently unavailable. Check `/health` for more details. </response>
[Authorize(Scopes.ReadCorpora)]
[Authorize(Scopes.ReadFiles)]
[HttpGet("{id}", Name = Endpoints.GetCorpus)]
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)]
Expand All @@ -63,7 +67,7 @@ public async Task<ActionResult<CorpusDto>> GetAsync([NotNull] string id, Cancell
/// <response code="401">The client is not authenticated</response>
/// <response code="403">The authenticated client cannot perform the operation</response>
/// <response code="503">A necessary service is currently unavailable. Check `/health` for more details. </response>
[Authorize(Scopes.CreateCorpora)]
[Authorize(Scopes.CreateFiles)]
[HttpPost]
[ProducesResponseType(StatusCodes.Status201Created)]
[ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)]
Expand All @@ -77,7 +81,7 @@ public async Task<ActionResult<CorpusDto>> CreateAsync(
CancellationToken cancellationToken
)
{
Corpus corpus = await MapAsync(getDataFileClient, corpusConfig, idGenerator.GenerateId(), cancellationToken);
Corpus corpus = await MapAsync(corpusConfig, idGenerator.GenerateId(), cancellationToken);
await _corpusService.CreateAsync(corpus, cancellationToken);
CorpusDto dto = Map(corpus);
return Created(dto.Url, dto);
Expand All @@ -87,15 +91,15 @@ CancellationToken cancellationToken
/// Update an existing corpus
/// </summary>
/// <param name="id">The unique identifier for the corpus</param>
/// <param name="files">The new corpus files</param>
/// <param name="files">Tuples of the ids of the new corpus files and the associated text ids</param>
/// <param name="cancellationToken"></param>
/// <response code="200">The corpus was updated successfully</response>
/// <response code="400">Bad request</response>
/// <response code="401">The client is not authenticated</response>
/// <response code="403">The authenticated client cannot perform the operation or does not own the corpus</response>
/// <response code="404">The corpus does not exist and therefore cannot be updated</response>
/// <response code="503">A necessary service is currently unavailable. Check `/health` for more details. </response>
[Authorize(Scopes.UpdateCorpora)]
[Authorize(Scopes.UpdateFiles)]
[HttpPatch("{id}")]
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(typeof(void), StatusCodes.Status400BadRequest)]
Expand All @@ -106,15 +110,14 @@ CancellationToken cancellationToken
public async Task<ActionResult<CorpusDto>> UpdateAsync(
[NotNull] string id,
[NotNull] IReadOnlyList<CorpusFileConfigDto> files,
[FromServices] IRequestClient<GetDataFile> getDataFileClient,
CancellationToken cancellationToken
)
{
await AuthorizeAsync(id, cancellationToken);

Corpus corpus = await _corpusService.UpdateAsync(
id,
await MapAsync(getDataFileClient, files, cancellationToken),
await MapAsync(files, cancellationToken),
cancellationToken
);

Expand All @@ -132,7 +135,7 @@ await MapAsync(getDataFileClient, files, cancellationToken),
/// <response code="403">The authenticated client cannot perform the operation or does not own the corpus</response>
/// <response code="404">The corpus does not exist and therefore cannot be deleted</response>
/// <response code="503">A necessary service is currently unavailable. Check `/health` for more details. </response>
[Authorize(Scopes.DeleteCorpora)]
[Authorize(Scopes.DeleteFiles)]
[HttpDelete("{id}")]
[ProducesResponseType(typeof(void), StatusCodes.Status200OK)]
[ProducesResponseType(typeof(void), StatusCodes.Status401Unauthorized)]
Expand All @@ -152,53 +155,31 @@ private async Task AuthorizeAsync(string id, CancellationToken cancellationToken
await AuthorizeAsync(corpus);
}

private async Task<Corpus> MapAsync(
IRequestClient<GetDataFile> getDataFileClient,
CorpusConfigDto corpusConfig,
string id,
CancellationToken cancellationToken
)
private async Task<Corpus> MapAsync(CorpusConfigDto corpusConfig, string id, CancellationToken cancellationToken)
{
return new Corpus
{
Id = id,
Owner = Owner,
Language = corpusConfig.Language,
Files = await MapAsync(getDataFileClient, corpusConfig.Files, cancellationToken)
Files = await MapAsync(corpusConfig.Files, cancellationToken)
};
}

private async Task<IReadOnlyList<CorpusFile>> MapAsync(
IRequestClient<GetDataFile> getDataFileClient,
IEnumerable<CorpusFileConfigDto> corpusFileConfigs,
IReadOnlyList<CorpusFileConfigDto> files,
CancellationToken cancellationToken
)
{
var files = new List<CorpusFile>();
foreach (CorpusFileConfigDto corpusFileConfig in corpusFileConfigs)
var dataFiles = new List<CorpusFile>();
foreach (CorpusFileConfigDto file in files)
{
Response<DataFileResult, DataFileNotFound> response = await getDataFileClient.GetResponse<
DataFileResult,
DataFileNotFound
>(new GetDataFile { DataFileId = corpusFileConfig.FileId, Owner = Owner }, cancellationToken);
if (response.Is(out Response<DataFileResult>? result))
{
files.Add(
new CorpusFile
{
Id = corpusFileConfig.FileId,
Filename = result.Message.Filename,
TextId = corpusFileConfig.TextId ?? result.Message.Name,
Format = result.Message.Format
}
);
}
else if (response.Is(out Response<DataFileNotFound>? _))
{
throw new InvalidOperationException($"The data file {corpusFileConfig.FileId} cannot be found.");
}
DataFile? dataFile = await _dataFileService.GetAsync(file.FileId, cancellationToken);
if (dataFile == null)
throw new InvalidOperationException($"DataFile with id {file.FileId} does not exist.");
dataFiles.Add(new CorpusFile { File = dataFile, TextId = file.TextId });
}
return files;
return dataFiles;
}

private CorpusDto Map(Corpus source)
Expand All @@ -216,14 +197,18 @@ private CorpusDto Map(Corpus source)

private CorpusFileDto Map(CorpusFile source)
{
return new CorpusFileDto
return new CorpusFileDto { File = Map(source.File), TextId = source.TextId };
}

private DataFileDto Map(DataFile source)
{
return new DataFileDto
{
TextId = source.TextId,
File = new ResourceLinkDto
{
Id = source.Id,
Url = _urlService.GetUrl(Endpoints.GetDataFile, new { id = source.Id })
}
Id = source.Id,
Url = _urlService.GetUrl(Endpoints.GetDataFile, new { id = source.Id }),
Name = source.Name,
Format = source.Format,
Revision = source.Revision
};
}
}
7 changes: 7 additions & 0 deletions src/Serval/src/Serval.DataFiles/Models/CorpusFile.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace Serval.Corpora.Contracts;

public record CorpusFile
{
public required DataFile File { get; init; }
public string? TextId { get; init; }
}
3 changes: 3 additions & 0 deletions src/Serval/src/Serval.DataFiles/Usings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
global using Microsoft.Extensions.Logging;
global using Microsoft.Extensions.Options;
global using NSwag.Annotations;
global using Serval.Corpora.Contracts;
global using Serval.Corpora.Models;
global using Serval.Corpora.Services;
global using Serval.DataFiles.Consumers;
global using Serval.DataFiles.Contracts;
global using Serval.DataFiles.Models;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,4 +185,4 @@ enum TranslationSource {
TRANSLATION_SOURCE_PRIMARY = 0;
TRANSLATION_SOURCE_SECONDARY = 1;
TRANSLATION_SOURCE_HUMAN = 2;
}
}
7 changes: 7 additions & 0 deletions src/Serval/src/Serval.Shared/Contracts/CorpusFileResult.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace Serval.Shared.Contracts;

public record CorpusFileResult
{
public required DataFileResult File { get; init; }
public required string TextId { get; init; }
}
7 changes: 7 additions & 0 deletions src/Serval/src/Serval.Shared/Contracts/CorpusNotFound.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace Serval.Shared.Contracts;

public record CorpusNotFound
{
public required string CorpusId { get; init; }
public required string Owner { get; init; }
}
Loading

0 comments on commit bc055a1

Please sign in to comment.