Skip to content

Commit

Permalink
Fix non-clang build (probably)
Browse files Browse the repository at this point in the history
Fix some mistakes
  • Loading branch information
kg committed Mar 21, 2024
1 parent 89e4aee commit f0a1a04
Showing 1 changed file with 25 additions and 5 deletions.
30 changes: 25 additions & 5 deletions src/mono/mono/metadata/metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -1495,7 +1495,9 @@ mono_metadata_decode_row_col_raw (const MonoTableInfo *t, int idx, guint col)
return 0;
}

static MONO_ALWAYS_INLINE guint32
// This will inline into mono_metadata_decode_value_simd on targets that can't use
// the simd version of the algorithm.
MONO_ALWAYS_INLINE static guint32
decode_value_scalar (const guint8 *ptr, const guint8 **new_ptr)
{
guint32 result;
Expand All @@ -1519,9 +1521,13 @@ decode_value_scalar (const guint8 *ptr, const guint8 **new_ptr)
ptr += 5;
}

if (new_ptr)
*new_ptr = ptr;

return result;
}

// This is meant to be wrapped by our existing decode_value functions, and inline into them.
MONO_ALWAYS_INLINE guint32
mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr)
{
Expand All @@ -1531,7 +1537,7 @@ mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr)
typedef guint8 v64_u1 __attribute__ ((vector_size (8)));
typedef guint32 v64_u4 __attribute__ ((vector_size (8)));

// this will generate vectorized code on x64 and wasm as long as SIMD is enabled
// this will generate vectorized code on x64 and wasm as long as SIMD is enabled at build time.
// if simd isn't enabled, it generates fairly adequate scalar code.
// *(bytes *)ptr and *(guint32 *)ptr by themselves don't force an i32 load of
// ptr in either x64 or wasm clang, so this is the only way to prefetch all the bytes
Expand All @@ -1540,13 +1546,15 @@ mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr)
// we could overrun the source buffer by up to 7 bytes, but doing that on wasm is
// safe unless we're decoding from the absolute end of memory.
// we pad all buffers by 16 bytes in mono_wasm_load_bytes_into_heap, so we're fine
// clang happily upsizes these 8-byte vectors into 16-byte ones for wasm, and uses 8-byte
// clang happily upsizes these 8-byte vectors into 16-byte ones for wasm, and uses 8-byte-wide
// insns on x64 as appropriate. armv8 looks fine too, albeit a little weird
union {
v64_u1 b;
v64_u4 i;
} v;
v.b = *(v64_u1 *)ptr;
// ideally we would load 5 bytes, but it won't use a vector load then
// memcpy instead of a regular pointer dereference, to say 'this is unaligned'
memcpy(&v, ptr, sizeof(v));
// mask and shift two bits so we can have a 4-element jump table in wasm
guint8 flags = (v.b[0] & (0x80u | 0x40u)) >> 6;
switch (flags) {
Expand All @@ -1571,8 +1579,13 @@ mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr)
// i don't know why the default case is necessary here, but without it the jump table has 5 entries.
default:
// (b * 0x80) != 0, and (b & 0x40) != 0
// for some reason on wasm the 'v.b[0]' load generates an '& 255',
// even if we cache it in a guint8 local
if (v.b[0] == 0xFFu) {
// v.b = { ptr[4], ptr[3], ptr[2], ptr[1] }
// on x64 this generates kind of gross code, i.e.
// pshufd, pshufhw, pshufd, pshuflw, packuswb
// but on wasm it's fine
v.b = __builtin_shufflevector(
v.b, v.b,
4, 3, 2, 1, -1, -1, -1, -1
Expand All @@ -1582,12 +1595,19 @@ mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr)
ptr += 5;
} else {
// v.b = { ptr[3], ptr[2], ptr[1], ptr[0] }
#ifdef USE_BSWAP
// generates much smaller x64 assembly, but terrible wasm
// interestingly, clang for arm automatically does this to the shuffle below
result = __builtin_bswap32(v.i[0]) & 0x1FFFFFFFu;
#else
// on x64 unlike the above this generates a single pshuflw + pack
v.b = __builtin_shufflevector(
v.b, v.b,
3, 2, 1, 0, -1, -1, -1, -1
);
// result = v.b[0..3] where v.b[0] &= 0x1F
result = v.i[0] & 0x1FFFFFFFu;
#endif
ptr += 4;
}
break;
Expand All @@ -1598,7 +1618,7 @@ mono_metadata_decode_value_simd (const guint8 *ptr, const guint8 **new_ptr)

return result;
#else
return decode_value_scalar (ptr, rptr);
return decode_value_scalar (ptr, new_ptr);
#endif
}

Expand Down

0 comments on commit f0a1a04

Please sign in to comment.