-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix bug with pretranslating all; begin porting tests to toolkit
- Loading branch information
Showing
10 changed files
with
220 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
32 changes: 32 additions & 0 deletions
32
src/ServiceToolkit/test/SIL.ServiceToolkit/SIL.ServiceToolkit.Tests.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="coverlet.collector" Version="6.0.0"> | ||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
<PrivateAssets>all</PrivateAssets> | ||
</PackageReference> | ||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" /> | ||
<PackageReference Include="NSubstitute" Version="5.1.0" /> | ||
<PackageReference Include="NSubstitute.Analyzers.CSharp" Version="1.0.16"> | ||
<PrivateAssets>all</PrivateAssets> | ||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
</PackageReference> | ||
<PackageReference Include="NUnit" Version="4.0.1" /> | ||
<PackageReference Include="NUnit3TestAdapter" Version="4.5.0" /> | ||
<PackageReference Include="NUnit.Analyzers" Version="4.0.0"> | ||
<PrivateAssets>all</PrivateAssets> | ||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
</PackageReference> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="../../src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj"/> | ||
</ItemGroup> | ||
|
||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
global using NUnit.Framework; | ||
global using SIL.ServiceToolkit.Models; |
96 changes: 96 additions & 0 deletions
96
src/ServiceToolkit/test/SIL.ServiceToolkit/Utils/ParallelCorpusProcessorTests.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
namespace SIL.ServiceToolkit.Utils; | ||
|
||
[TestFixture] | ||
public class ParallelCorpusPreprocessorTests | ||
{ | ||
private static readonly string TestDataPath = Path.Combine( | ||
AppContext.BaseDirectory, | ||
"..", | ||
"..", | ||
"..", | ||
"Utils", | ||
"data" | ||
); | ||
|
||
[Test] | ||
public void TestParallelCorpusPreprocessor() | ||
{ | ||
var processor = new ParallelCorpusPreprocessor(); | ||
List<ParallelCorpus> corpora = | ||
new() | ||
{ | ||
new() | ||
{ | ||
Id = "corpus1", | ||
SourceCorpora = new List<MonolingualCorpus> | ||
{ | ||
new MonolingualCorpus() | ||
{ | ||
Id = "source-corpus1", | ||
Language = "en", | ||
Files = new List<CorpusFile> | ||
{ | ||
new() | ||
{ | ||
TextId = "textId1", | ||
Format = FileFormat.Text, | ||
Location = Path.Combine(TestDataPath, "source1.txt") | ||
} | ||
} | ||
}, | ||
new MonolingualCorpus() | ||
{ | ||
Id = "source-corpus2", | ||
Language = "en", | ||
Files = new List<CorpusFile> | ||
{ | ||
new() | ||
{ | ||
TextId = "textId1", | ||
Format = FileFormat.Text, | ||
Location = Path.Combine(TestDataPath, "source2.txt") | ||
} | ||
} | ||
} | ||
}, | ||
TargetCorpora = new List<MonolingualCorpus> | ||
{ | ||
new MonolingualCorpus() | ||
{ | ||
Id = "target-corpus1", | ||
Language = "en", | ||
Files = new List<CorpusFile> | ||
{ | ||
new() | ||
{ | ||
TextId = "textId1", | ||
Format = FileFormat.Text, | ||
Location = Path.Combine(TestDataPath, "target1.txt") | ||
} | ||
} | ||
} | ||
} | ||
} | ||
}; | ||
int trainCount = 0; | ||
int pretranslateCount = 0; | ||
processor.Preprocess( | ||
corpora, | ||
row => | ||
{ | ||
if (row.SourceSegment.Length > 0 && row.TargetSegment.Length > 0) | ||
trainCount++; | ||
}, | ||
(row, corpus) => | ||
{ | ||
pretranslateCount++; | ||
}, | ||
false | ||
); | ||
Assert.Multiple(() => | ||
{ | ||
Assert.That(trainCount, Is.EqualTo(2)); | ||
Assert.That(pretranslateCount, Is.EqualTo(3)); | ||
}); | ||
} | ||
} |
7 changes: 7 additions & 0 deletions
7
src/ServiceToolkit/test/SIL.ServiceToolkit/Utils/data/source1.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
Source one, Line 1 | ||
Source one, Line 2 | ||
|
||
Source one, Line 4 | ||
|
||
Source one, Line 6 | ||
|
7 changes: 7 additions & 0 deletions
7
src/ServiceToolkit/test/SIL.ServiceToolkit/Utils/data/source2.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
Source two, Line 1 | ||
Source two, Line 2 | ||
|
||
Source two, Line 4 | ||
Source two, Line 5 | ||
Source two, Line 6 | ||
|
7 changes: 7 additions & 0 deletions
7
src/ServiceToolkit/test/SIL.ServiceToolkit/Utils/data/target1.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
Target one, Line 1 | ||
|
||
|
||
Target one, Line 4 | ||
|
||
|
||
Target one, Line 7 |