Skip to content

Commit

Permalink
React to HW-api changes (#46)
Browse files Browse the repository at this point in the history
* Updated CI to latest SDK-preview

* netcoreapp3.0 API changes

* Fixed win-build failure

Cf. #46 (comment)

* Fixed coverlet bug by using specific version

Cf. #46 (comment)
  • Loading branch information
gfoidl committed Dec 25, 2018
1 parent 4c8c69f commit 2534fb9
Show file tree
Hide file tree
Showing 14 changed files with 321 additions and 197 deletions.
2 changes: 1 addition & 1 deletion .azure/pipelines/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ variables:
CI_BUILD_NUMBER: $(Build.BuildId)
BRANCH_NAME: $(Build.SourceBranchName)
TAG_NAME: $(Build.SourceBranchName)
SDK_VERSION: 3.0.100-preview-009765
SDK_VERSION: 3.0.100-preview-009844

jobs:
- template: jobs/build_and_test.yml
Expand Down
3 changes: 2 additions & 1 deletion .azure/pipelines/jobs/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ jobs:
dotnet --list-sdks
echo "-------------------------------------------------"
dotnet tool install -g coverlet.console
# https://github.com/tonerdo/coverlet/issues/274
dotnet tool install -g coverlet.console --version 1.3.0
# Workaround
# https://github.com/Microsoft/azure-pipelines-tasks/issues/8291#issuecomment-441707116
export PATH="$PATH:$HOME/.dotnet/tools"
Expand Down
2 changes: 1 addition & 1 deletion global.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"sdk": {
"version": "3.0.100-preview-009765"
"version": "3.0.100-preview-009844"
}
}
9 changes: 4 additions & 5 deletions source/gfoidl.Base64/Internal/Avx2Helper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ internal static class Avx2Helper
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Write(Vector256<sbyte> vec, ref char dest)
{
// https://github.com/dotnet/coreclr/issues/21130
Vector256<sbyte> zero = Avx.SetZeroVector256<sbyte>();
Vector256<sbyte> zero = Vector256<sbyte>.Zero;

Vector256<sbyte> c0 = Avx2.UnpackLow(vec, zero);
Vector256<sbyte> c1 = Avx2.UnpackHigh(vec, zero);
Expand Down Expand Up @@ -46,14 +45,14 @@ public static Vector256<sbyte> Read(ref char src)
Vector256<short> c1 = Unsafe.As<char, Vector256<short>>(ref Unsafe.Add(ref src, 16));

Vector256<byte> t0 = Avx2.PackUnsignedSaturate(c0, c1);
Vector256<long> t1 = Avx2.Permute4x64(Avx.StaticCast<byte, long>(t0), 0b_11_01_10_00);
Vector256<long> t1 = Avx2.Permute4x64(t0.AsInt64(), 0b_11_01_10_00);

return Avx.StaticCast<long, sbyte>(t1);
return t1.AsSByte();
}
//---------------------------------------------------------------------
public static Vector256<sbyte> LessThan(Vector256<sbyte> left, Vector256<sbyte> right)
{
Vector256<sbyte> allOnes = Avx.SetAllVector256<sbyte>(-1);
Vector256<sbyte> allOnes = Vector256.Create((sbyte)-1);
return LessThan(left, right, allOnes);
}
//---------------------------------------------------------------------
Expand Down
44 changes: 34 additions & 10 deletions source/gfoidl.Base64/Internal/Base64Encoder/Base64Encoder.Decode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,8 @@ private static void Avx2Decode<T>(ref T src, ref byte destBytes, int sourceLengt
Vector256<sbyte> lutLo = s_avx_decodeLutLo;
Vector256<sbyte> lutShift = s_avx_decodeLutShift;
Vector256<sbyte> mask2F = s_avx_decodeMask2F;
Vector256<sbyte> shuffleConstant0 = Avx.StaticCast<int, sbyte>(Avx.SetAllVector256(0x01400140));
Vector256<short> shuffleConstant1 = Avx.StaticCast<int, short>(Avx.SetAllVector256(0x00011000));
Vector256<sbyte> shuffleConstant0 = Vector256.Create(0x01400140).AsSByte();
Vector256<short> shuffleConstant1 = Vector256.Create(0x00011000).AsInt16();
Vector256<sbyte> shuffleVec = s_avx_decodeShuffleVec;
Vector256<int> permuteVec = s_avx_decodePermuteVec;

Expand All @@ -377,11 +377,15 @@ private static void Avx2Decode<T>(ref T src, ref byte destBytes, int sourceLengt
throw new NotSupportedException(); // just in case new types are introduced in the future
}

#if NETCOREAPP3_0
Vector256<sbyte> hiNibbles = Avx2.And(Avx2.ShiftRightLogical(str.AsInt32(), 4).AsSByte(), mask2F);
#else
Vector256<sbyte> hiNibbles = Avx2.And(Avx.StaticCast<int, sbyte>(Avx2.ShiftRightLogical(Avx.StaticCast<sbyte, int>(str), 4)), mask2F);
#endif
Vector256<sbyte> loNibbles = Avx2.And(str, mask2F);
Vector256<sbyte> hi = Avx2.Shuffle(lutHi, hiNibbles);
Vector256<sbyte> lo = Avx2.Shuffle(lutLo, loNibbles);
Vector256<sbyte> zero = Avx.SetZeroVector256<sbyte>();
Vector256<sbyte> zero = Vector256<sbyte>.Zero;

// https://github.com/dotnet/coreclr/issues/21247
if (Avx2.MoveMask(Avx2.CompareGreaterThan(Avx2.And(lo, hi), zero)) != 0)
Expand All @@ -393,10 +397,17 @@ private static void Avx2Decode<T>(ref T src, ref byte destBytes, int sourceLengt
Vector256<sbyte> shift = Avx2.Shuffle(lutShift, Avx2.Add(eq2F, hiNibbles));
str = Avx2.Add(str, shift);

#if NETCOREAPP3_0
Vector256<short> merge_ab_and_bc = Avx2.MultiplyAddAdjacent(str.AsByte(), shuffleConstant0);
Vector256<int> @out = Avx2.MultiplyAddAdjacent(merge_ab_and_bc, shuffleConstant1);
@out = Avx2.Shuffle(@out.AsSByte(), shuffleVec).AsInt32();
str = Avx2.PermuteVar8x32(@out, permuteVec).AsSByte();
#else
Vector256<short> merge_ab_and_bc = Avx2.MultiplyAddAdjacent(Avx.StaticCast<sbyte, byte>(str), shuffleConstant0);
Vector256<int> @out = Avx2.MultiplyAddAdjacent(merge_ab_and_bc, shuffleConstant1);
@out = Avx.StaticCast<sbyte, int>(Avx2.Shuffle(Avx.StaticCast<int, sbyte>(@out), shuffleVec));
str = Avx.StaticCast<int, sbyte>(Avx2.PermuteVar8x32(@out, permuteVec));
#endif

// As has better CQ than WriteUnaligned
// https://github.com/dotnet/coreclr/issues/21132
Expand Down Expand Up @@ -432,8 +443,13 @@ private static void Sse2Decode<T>(ref T src, ref byte destBytes, int sourceLengt
Vector128<sbyte> lutLo = s_sse_decodeLutLo;
Vector128<sbyte> lutShift = s_sse_decodeLutShift;
Vector128<sbyte> mask2F = s_sse_decodeMask2F;
#if NETCOREAPP3_0
Vector128<sbyte> shuffleConstant0 = Vector128.Create(0x01400140).AsSByte();
Vector128<short> shuffleConstant1 = Vector128.Create(0x00011000).AsInt16();
#else
Vector128<sbyte> shuffleConstant0 = Sse.StaticCast<int, sbyte>(Sse2.SetAllVector128(0x01400140));
Vector128<short> shuffleConstant1 = Sse.StaticCast<int, short>(Sse2.SetAllVector128(0x00011000));
#endif
Vector128<sbyte> shuffleVec = s_sse_decodeShuffleVec;

//while (remaining >= 24)
Expand All @@ -450,21 +466,27 @@ private static void Sse2Decode<T>(ref T src, ref byte destBytes, int sourceLengt
Vector128<short> c0 = Unsafe.As<T, Vector128<short>>(ref src);
Vector128<short> c1 = Unsafe.As<T, Vector128<short>>(ref Unsafe.Add(ref src, 8));

#if NETCOREAPP3_0
str = Sse2.PackUnsignedSaturate(c0, c1).AsSByte();
#else
str = Sse.StaticCast<byte, sbyte>(Sse2.PackUnsignedSaturate(c0, c1));
#endif
}
else
{
throw new NotSupportedException(); // just in case new types are introduced in the future
}

#if NETCOREAPP3_0
Vector128<sbyte> hiNibbles = Sse2.And(Sse2.ShiftRightLogical(str.AsInt32(), 4).AsSByte(), mask2F);
#else
Vector128<sbyte> hiNibbles = Sse2.And(Sse.StaticCast<int, sbyte>(Sse2.ShiftRightLogical(Sse.StaticCast<sbyte, int>(str), 4)), mask2F);
#endif
Vector128<sbyte> loNibbles = Sse2.And(str, mask2F);
Vector128<sbyte> hi = Ssse3.Shuffle(lutHi, hiNibbles);
Vector128<sbyte> lo = Ssse3.Shuffle(lutLo, loNibbles);
#if NETCOREAPP3_0
// https://github.com/dotnet/coreclr/issues/21130
//Vector128<sbyte> zero = Vector128<sbyte>.Zero;
Vector128<sbyte> zero = Sse2.SetZeroVector128<sbyte>();
Vector128<sbyte> zero = Vector128<sbyte>.Zero;
#elif NETCOREAPP2_1
Vector128<sbyte> zero = Sse2.SetZeroVector128<sbyte>();
#endif
Expand All @@ -477,13 +499,15 @@ private static void Sse2Decode<T>(ref T src, ref byte destBytes, int sourceLengt
Vector128<sbyte> shift = Ssse3.Shuffle(lutShift, Sse2.Add(eq2F, hiNibbles));
str = Sse2.Add(str, shift);

Vector128<short> merge_ab_and_bc = Ssse3.MultiplyAddAdjacent(Sse.StaticCast<sbyte, byte>(str), shuffleConstant0);
#if NETCOREAPP3_0
Vector128<int> @out = Sse2.MultiplyAddAdjacent(merge_ab_and_bc, shuffleConstant1);
Vector128<short> merge_ab_and_bc = Ssse3.MultiplyAddAdjacent(str.AsByte(), shuffleConstant0);
Vector128<int> @out = Sse2.MultiplyAddAdjacent(merge_ab_and_bc, shuffleConstant1);
str = Ssse3.Shuffle(@out.AsSByte(), shuffleVec);
#elif NETCOREAPP2_1
Vector128<int> @out = Sse2.MultiplyHorizontalAdd(merge_ab_and_bc, shuffleConstant1);
Vector128<short> merge_ab_and_bc = Ssse3.MultiplyAddAdjacent(Sse.StaticCast<sbyte, byte>(str), shuffleConstant0);
Vector128<int> @out = Sse2.MultiplyHorizontalAdd(merge_ab_and_bc, shuffleConstant1);
str = Ssse3.Shuffle(Sse.StaticCast<int, sbyte>(@out), shuffleVec);
#endif
str = Ssse3.Shuffle(Sse.StaticCast<int, sbyte>(@out), shuffleVec);

// As has better CQ than WriteUnaligned
// https://github.com/dotnet/coreclr/issues/21132
Expand Down
61 changes: 40 additions & 21 deletions source/gfoidl.Base64/Internal/Base64Encoder/Base64Encoder.Encode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -250,36 +250,34 @@ private static void Avx2Encode<T>(ref byte src, ref T dest, int sourceLength, re

// The JIT won't hoist these "constants", so help him
Vector256<sbyte> shuffleVec = s_avx_encodeShuffleVec;
Vector256<sbyte> shuffleConstant0 = Avx.StaticCast<int, sbyte>(Avx.SetAllVector256(0x0fc0fc00));
Vector256<sbyte> shuffleConstant2 = Avx.StaticCast<int, sbyte>(Avx.SetAllVector256(0x003f03f0));
Vector256<ushort> shuffleConstant1 = Avx.StaticCast<int, ushort>(Avx.SetAllVector256(0x04000040));
Vector256<short> shuffleConstant3 = Avx.StaticCast<int, short>(Avx.SetAllVector256(0x01000010));
Vector256<byte> translationContant0 = Avx.SetAllVector256((byte)51);
Vector256<sbyte> translationContant1 = Avx.SetAllVector256((sbyte)25);
Vector256<sbyte> shuffleConstant0 = Vector256.Create(0x0fc0fc00).AsSByte();
Vector256<sbyte> shuffleConstant2 = Vector256.Create(0x003f03f0).AsSByte();
Vector256<ushort> shuffleConstant1 = Vector256.Create(0x04000040).AsUInt16();
Vector256<short> shuffleConstant3 = Vector256.Create(0x01000010).AsInt16();
Vector256<byte> translationContant0 = Vector256.Create((byte)51);
Vector256<sbyte> translationContant1 = Vector256.Create((sbyte)25);
Vector256<sbyte> lut = s_avx_encodeLut;

// first load is done at c-0 not to get a segfault
Vector256<sbyte> str = Unsafe.ReadUnaligned<Vector256<sbyte>>(ref src);

// shift by 4 bytes, as required by enc_reshuffle
str = Avx.StaticCast<int, sbyte>(Avx2.PermuteVar8x32(
Avx.StaticCast<sbyte, int>(str),
s_avx_encodePermuteVec));
str = Avx2.PermuteVar8x32(str.AsInt32(), s_avx_encodePermuteVec).AsSByte();

while (true)
{
// Reshuffle
str = Avx2.Shuffle(str, shuffleVec);
Vector256<sbyte> t0 = Avx2.And(str, shuffleConstant0);
Vector256<sbyte> t2 = Avx2.And(str, shuffleConstant2);
Vector256<ushort> t1 = Avx2.MultiplyHigh(Avx.StaticCast<sbyte, ushort>(t0), shuffleConstant1);
Vector256<short> t3 = Avx2.MultiplyLow(Avx.StaticCast<sbyte, short>(t2), shuffleConstant3);
str = Avx2.Or(Avx.StaticCast<ushort, sbyte>(t1), Avx.StaticCast<short, sbyte>(t3));
Vector256<ushort> t1 = Avx2.MultiplyHigh(t0.AsUInt16(), shuffleConstant1);
Vector256<short> t3 = Avx2.MultiplyLow(t2.AsInt16(), shuffleConstant3);
str = Avx2.Or(t1.AsSByte(), t3.AsSByte());

// Translation
Vector256<byte> indices = Avx2.SubtractSaturate(Avx.StaticCast<sbyte, byte>(str), translationContant0);
Vector256<byte> indices = Avx2.SubtractSaturate(str.AsByte(), translationContant0);
Vector256<sbyte> mask = Avx2.CompareGreaterThan(str, translationContant1);
Vector256<sbyte> tmp = Avx2.Subtract(Avx.StaticCast<byte, sbyte>(indices), mask);
Vector256<sbyte> tmp = Avx2.Subtract(indices.AsSByte(), mask);
str = Avx2.Add(str, Avx2.Shuffle(lut, tmp));

if (typeof(T) == typeof(byte))
Expand Down Expand Up @@ -337,12 +335,21 @@ private static void Sse2Encode<T>(ref byte src, ref T dest, int sourceLength, re

// The JIT won't hoist these "constants", so help him
Vector128<sbyte> shuffleVec = s_sse_encodeShuffleVec;
Vector128<sbyte> shuffleConstant0 = Sse.StaticCast<int, sbyte>(Sse2.SetAllVector128(0x0fc0fc00));
Vector128<sbyte> shuffleConstant2 = Sse.StaticCast<int, sbyte>(Sse2.SetAllVector128(0x003f03f0));
#if NETCOREAPP3_0
Vector128<sbyte> shuffleConstant0 = Vector128.Create(0x0fc0fc00).AsSByte();
Vector128<sbyte> shuffleConstant2 = Vector128.Create(0x003f03f0).AsSByte();
Vector128<ushort> shuffleConstant1 = Vector128.Create(0x04000040).AsUInt16();
Vector128<short> shuffleConstant3 = Vector128.Create(0x01000010).AsInt16();
Vector128<byte> translationContant0 = Vector128.Create((byte) 51);
Vector128<sbyte> translationContant1 = Vector128.Create((sbyte)25);
#else
Vector128<sbyte> shuffleConstant0 = Sse.StaticCast<int, sbyte> (Sse2.SetAllVector128(0x0fc0fc00));
Vector128<sbyte> shuffleConstant2 = Sse.StaticCast<int, sbyte> (Sse2.SetAllVector128(0x003f03f0));
Vector128<ushort> shuffleConstant1 = Sse.StaticCast<int, ushort>(Sse2.SetAllVector128(0x04000040));
Vector128<short> shuffleConstant3 = Sse.StaticCast<int, short>(Sse2.SetAllVector128(0x01000010));
Vector128<byte> translationContant0 = Sse2.SetAllVector128((byte)51);
Vector128<short> shuffleConstant3 = Sse.StaticCast<int, short> (Sse2.SetAllVector128(0x01000010));
Vector128<byte> translationContant0 = Sse2.SetAllVector128((byte) 51);
Vector128<sbyte> translationContant1 = Sse2.SetAllVector128((sbyte)25);
#endif
Vector128<sbyte> lut = s_sse_encodeLut;

//while (remaining >= 16)
Expand All @@ -354,14 +361,27 @@ private static void Sse2Encode<T>(ref byte src, ref T dest, int sourceLength, re
str = Ssse3.Shuffle(str, shuffleVec);
Vector128<sbyte> t0 = Sse2.And(str, shuffleConstant0);
Vector128<sbyte> t2 = Sse2.And(str, shuffleConstant2);

#if NETCOREAPP3_0
Vector128<ushort> t1 = Sse2.MultiplyHigh(t0.AsUInt16(), shuffleConstant1);
Vector128<short> t3 = Sse2.MultiplyLow(t2.AsInt16(), shuffleConstant3);
str = Sse2.Or(t1.AsSByte(), t3.AsSByte());
#else
Vector128<ushort> t1 = Sse2.MultiplyHigh(Sse.StaticCast<sbyte, ushort>(t0), shuffleConstant1);
Vector128<short> t3 = Sse2.MultiplyLow(Sse.StaticCast<sbyte, short>(t2), shuffleConstant3);
str = Sse2.Or(Sse.StaticCast<ushort, sbyte>(t1), Sse.StaticCast<short, sbyte>(t3));
#endif

// Translation
#if NETCOREAPP3_0
Vector128<byte> indices = Sse2.SubtractSaturate(str.AsByte(), translationContant0);
Vector128<sbyte> mask = Sse2.CompareGreaterThan(str, translationContant1);
Vector128<sbyte> tmp = Sse2.Subtract(indices.AsSByte(), mask);
#else
Vector128<byte> indices = Sse2.SubtractSaturate(Sse.StaticCast<sbyte, byte>(str), translationContant0);
Vector128<sbyte> mask = Sse2.CompareGreaterThan(str, translationContant1);
Vector128<sbyte> tmp = Sse2.Subtract(Sse.StaticCast<byte, sbyte>(indices), mask);
#endif
str = Sse2.Add(str, Ssse3.Shuffle(lut, tmp));

if (typeof(T) == typeof(byte))
Expand All @@ -374,12 +394,11 @@ private static void Sse2Encode<T>(ref byte src, ref T dest, int sourceLength, re
{
#if NETCOREAPP3_0
// https://github.com/dotnet/coreclr/issues/21130
//Vector128<sbyte> zero = Vector128<sbyte>.Zero;
Vector128<sbyte> zero = Sse2.SetZeroVector128<sbyte>();
Vector128<sbyte> zero = Vector128<sbyte>.Zero;
#else
Vector128<sbyte> zero = Sse2.SetZeroVector128<sbyte>();
#endif
Vector128<sbyte> c0 = Sse2.UnpackLow(str, zero);
Vector128<sbyte> c0 = Sse2.UnpackLow(str , zero);
Vector128<sbyte> c1 = Sse2.UnpackHigh(str, zero);

// As has better CQ than WriteUnaligned
Expand Down
Loading

0 comments on commit 2534fb9

Please sign in to comment.