Skip to content

Commit

Permalink
Remove use of inline ASM in insert_string_sse
Browse files Browse the repository at this point in the history
It seems that some years ago clang@Windows didn't have the
proper intrinsic required, which prompted the use of inline
ASM.

It has a side effect in that it will allow compilation of the
optimized function within the same compilation unit while using regular
compiler flags (i.e. 'crc32' instruction on x86 requires some special
compiler flags).

Main issue is that inline ASM is blocked on dependencies (e.g. 'base')
that will be linked to NaCl.

The main idea here is to allow the whole Chromium code base to use the
highly optimized checksums in zlib (e.g. crc32 and Adler-32), exported
through an interface (i.e. base::Crc32()).

This patch fixes this issue by removing the use of inline ASM.

The workaround is to use clang/gcc 'target attributes' to instruct the
backend to use different code generation options for the optimized
function, see:
https://clang.llvm.org/docs/AttributeReference.html#target

Bug: 902789
Change-Id: I0d139268aefb8335310c0e3f6533006be9af6470
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1931272
Reviewed-by: Adenilson Cavalcanti <cavalcantii@chromium.org>
Commit-Queue: Adenilson Cavalcanti <cavalcantii@chromium.org>
Cr-Commit-Position: refs/heads/master@{#718788}
  • Loading branch information
Adenilson Cavalcanti authored and Commit Bot committed Nov 25, 2019
1 parent 3de04d8 commit ea6b928
Showing 1 changed file with 26 additions and 37 deletions.
63 changes: 26 additions & 37 deletions third_party/zlib/deflate.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,30 @@ extern void ZLIB_INTERNAL copy_with_crc(z_streamp strm, Bytef *dst, long size);
#define INLINE inline
#endif

/* Inline optimisation */
local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str);
/* Intel optimized insert_string. */
#if defined(CRC32_SIMD_SSE42_PCLMUL)
#define _mm_crc32_u32 __builtin_ia32_crc32si
#define TARGET_INTEL_WITH_CRC __attribute__((target("sse4.2")))
TARGET_INTEL_WITH_CRC
local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
{
Pos ret;
unsigned *ip, val, h = 0;

ip = (unsigned *)&s->window[str];
val = *ip;

if (s->level >= 6)
val &= 0xFFFFFF;

h = _mm_crc32_u32(h, val);

ret = s->head[h & s->hash_mask];
s->head[h & s->hash_mask] = str;
s->prev[str & s->w_mask] = ret;
return ret;
}
#endif

/* ===========================================================================
* Local data
Expand Down Expand Up @@ -228,9 +250,10 @@ local INLINE Pos insert_string(deflate_state *const s, const Pos str)
#if defined(CRC32_ARMV8_CRC32)
if (arm_cpu_enable_crc32)
return insert_string_arm(s, str);
#endif
#elif defined(CRC32_SIMD_SSE42_PCLMUL)
if (x86_cpu_enable_simd)
return insert_string_sse(s, str);
#endif
#endif
return insert_string_c(s, str);
}
Expand Down Expand Up @@ -2276,37 +2299,3 @@ local block_state deflate_huff(s, flush)
FLUSH_BLOCK(s, 0);
return block_done;
}

/* Safe to inline this as GCC/clang will use inline asm and Visual Studio will
* use intrinsic without extra params
*/
local INLINE Pos insert_string_sse(deflate_state *const s, const Pos str)
{
Pos ret;
unsigned *ip, val, h = 0;

ip = (unsigned *)&s->window[str];
val = *ip;

if (s->level >= 6)
val &= 0xFFFFFF;

/* Windows clang should use inline asm */
#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64))
h = _mm_crc32_u32(h, val);
#elif defined(__i386__) || defined(__amd64__)
__asm__ __volatile__ (
"crc32 %1,%0\n\t"
: "+r" (h)
: "r" (val)
);
#else
/* This should never happen */
assert(0);
#endif

ret = s->head[h & s->hash_mask];
s->head[h & s->hash_mask] = str;
s->prev[str & s->w_mask] = ret;
return ret;
}

0 comments on commit ea6b928

Please sign in to comment.