Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deps: update zlib to 337322d #48218

Merged
merged 1 commit into from
May 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions deps/zlib/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,10 @@ if (build_with_chromium) {

data = [ "google/test/data/" ]

if (is_ios) {
bundle_deps = [ "google:zlib_pak_bundle_data" ]
}

deps = [
":zlib",
"google:compression_utils",
Expand Down
10 changes: 8 additions & 2 deletions deps/zlib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set(CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS ON)

project(zlib C)

set(VERSION "1.2.13")
set(VERSION "1.2.13.1")

set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries")
Expand All @@ -22,6 +22,7 @@ check_include_file(stdint.h HAVE_STDINT_H)
check_include_file(stddef.h HAVE_STDDEF_H)

option(ENABLE_SIMD_OPTIMIZATIONS "Enable all SIMD optimizations" OFF)
option(ENABLE_SIMD_AVX512 "Enable SIMD AXV512 optimizations" OFF)

# TODO(cavalcantii): add support for other OSes (e.g. Android, fuchsia, osx)
# and architectures (e.g. Arm).
Expand All @@ -30,8 +31,13 @@ if (ENABLE_SIMD_OPTIMIZATIONS)
add_definitions(-DADLER32_SIMD_SSSE3)
add_definitions(-DINFLATE_CHUNK_READ_64LE)
add_definitions(-DCRC32_SIMD_SSE42_PCLMUL)
if (ENABLE_SIMD_AVX512)
add_definitions(-DCRC32_SIMD_AVX512_PCLMUL)
add_compile_options(-mvpclmulqdq -msse2 -mavx512f -mpclmul)
else()
add_compile_options(-msse4.2 -mpclmul)
endif()
add_definitions(-DDEFLATE_SLIDE_HASH_SSE2)
add_compile_options(-msse4.2 -mpclmul)
# Required by CPU features detection code.
add_definitions(-DX86_NOT_WINDOWS)
# Apparently some environments (e.g. CentOS) require to explicitly link
Expand Down
2 changes: 2 additions & 0 deletions deps/zlib/contrib/optimizations/inflate.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ int value;
struct inflate_state FAR *state;

if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
if (bits == 0)
return Z_OK;
state = (struct inflate_state FAR *)strm->state;
if (bits < 0) {
state->hold = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#include <fuzzer/FuzzedDataProvider.h>

#include "third_party/zlib/zlib.h"
#include "zlib.h"

// Fuzzer builds often have NDEBUG set, so roll our own assert macro.
#define ASSERT(cond) \
Expand Down
9 changes: 9 additions & 0 deletions deps/zlib/cpu_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ int ZLIB_INTERNAL arm_cpu_enable_pmull = 0;
int ZLIB_INTERNAL x86_cpu_enable_sse2 = 0;
int ZLIB_INTERNAL x86_cpu_enable_ssse3 = 0;
int ZLIB_INTERNAL x86_cpu_enable_simd = 0;
int ZLIB_INTERNAL x86_cpu_enable_avx512 = 0;

#ifndef CPU_NO_SIMD

Expand Down Expand Up @@ -138,6 +139,10 @@ static void _cpu_check_features(void)
/* On x86 we simply use a instruction to check the CPU features.
* (i.e. CPUID).
*/
#ifdef CRC32_SIMD_AVX512_PCLMUL
#include <immintrin.h>
#include <xsaveintrin.h>
#endif
static void _cpu_check_features(void)
{
int x86_cpu_has_sse2;
Expand All @@ -164,6 +169,10 @@ static void _cpu_check_features(void)
x86_cpu_enable_simd = x86_cpu_has_sse2 &&
x86_cpu_has_sse42 &&
x86_cpu_has_pclmulqdq;

#ifdef CRC32_SIMD_AVX512_PCLMUL
x86_cpu_enable_avx512 = _xgetbv(0) & 0x00000040;
#endif
}
#endif
#endif
Expand Down
1 change: 1 addition & 0 deletions deps/zlib/cpu_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ extern int arm_cpu_enable_pmull;
extern int x86_cpu_enable_sse2;
extern int x86_cpu_enable_ssse3;
extern int x86_cpu_enable_simd;
extern int x86_cpu_enable_avx512;

void cpu_check_features(void);
14 changes: 13 additions & 1 deletion deps/zlib/crc32.c
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,19 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
}

#endif
#if defined(CRC32_SIMD_SSE42_PCLMUL)
#if defined(CRC32_SIMD_AVX512_PCLMUL)
if (x86_cpu_enable_avx512 && len >= Z_CRC32_AVX512_MINIMUM_LENGTH) {
/* crc32 64-byte chunks */
z_size_t chunk_size = len & ~Z_CRC32_AVX512_CHUNKSIZE_MASK;
crc = ~crc32_avx512_simd_(buf, chunk_size, ~(uint32_t)crc);
/* check remaining data */
len -= chunk_size;
if (!len)
return crc;
/* Fall into the default crc32 for the remaining data. */
buf += chunk_size;
}
#elif defined(CRC32_SIMD_SSE42_PCLMUL)
if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) {
/* crc32 16-byte chunks */
z_size_t chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK;
Expand Down
198 changes: 194 additions & 4 deletions deps/zlib/crc32_simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,207 @@
*/

#include "crc32_simd.h"

#if defined(CRC32_SIMD_SSE42_PCLMUL)
#if defined(CRC32_SIMD_AVX512_PCLMUL)

/*
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
* length must be at least 64, and a multiple of 16. Based on:
* crc32_avx512_simd_(): compute the crc32 of the buffer, where the buffer
* length must be at least 256, and a multiple of 64. Based on:
*
* "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
* V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0
*/

#include <emmintrin.h>
#include <smmintrin.h>
#include <wmmintrin.h>
#include <immintrin.h>

uint32_t ZLIB_INTERNAL crc32_avx512_simd_( /* AVX512+PCLMUL */
const unsigned char *buf,
z_size_t len,
uint32_t crc)
{
/*
* Definitions of the bit-reflected domain constants k1,k2,k3,k4
* are similar to those given at the end of the paper, and remaining
* constants and CRC32+Barrett polynomials remain unchanged.
*
* Replace the index of x from 128 to 512. As follows:
* k1 = ( x ^ ( 512 * 4 + 32 ) mod P(x) << 32 )' << 1 = 0x011542778a
* k2 = ( x ^ ( 512 * 4 - 32 ) mod P(x) << 32 )' << 1 = 0x01322d1430
* k3 = ( x ^ ( 512 + 32 ) mod P(x) << 32 )' << 1 = 0x0154442bd4
* k4 = ( x ^ ( 512 - 32 ) mod P(x) << 32 )' << 1 = 0x01c6e41596
*/
static const uint64_t zalign(64) k1k2[] = { 0x011542778a, 0x01322d1430,
0x011542778a, 0x01322d1430,
0x011542778a, 0x01322d1430,
0x011542778a, 0x01322d1430 };
static const uint64_t zalign(64) k3k4[] = { 0x0154442bd4, 0x01c6e41596,
0x0154442bd4, 0x01c6e41596,
0x0154442bd4, 0x01c6e41596,
0x0154442bd4, 0x01c6e41596 };
static const uint64_t zalign(16) k5k6[] = { 0x01751997d0, 0x00ccaa009e };
static const uint64_t zalign(16) k7k8[] = { 0x0163cd6124, 0x0000000000 };
static const uint64_t zalign(16) poly[] = { 0x01db710641, 0x01f7011641 };
__m512i x0, x1, x2, x3, x4, x5, x6, x7, x8, y5, y6, y7, y8;
__m128i a0, a1, a2, a3;

/*
* There's at least one block of 256.
*/
x1 = _mm512_loadu_si512((__m512i *)(buf + 0x00));
x2 = _mm512_loadu_si512((__m512i *)(buf + 0x40));
x3 = _mm512_loadu_si512((__m512i *)(buf + 0x80));
x4 = _mm512_loadu_si512((__m512i *)(buf + 0xC0));

x1 = _mm512_xor_si512(x1, _mm512_castsi128_si512(_mm_cvtsi32_si128(crc)));

x0 = _mm512_load_si512((__m512i *)k1k2);

buf += 256;
len -= 256;

/*
* Parallel fold blocks of 256, if any.
*/
while (len >= 256)
{
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
x6 = _mm512_clmulepi64_epi128(x2, x0, 0x00);
x7 = _mm512_clmulepi64_epi128(x3, x0, 0x00);
x8 = _mm512_clmulepi64_epi128(x4, x0, 0x00);


x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
x2 = _mm512_clmulepi64_epi128(x2, x0, 0x11);
x3 = _mm512_clmulepi64_epi128(x3, x0, 0x11);
x4 = _mm512_clmulepi64_epi128(x4, x0, 0x11);

y5 = _mm512_loadu_si512((__m512i *)(buf + 0x00));
y6 = _mm512_loadu_si512((__m512i *)(buf + 0x40));
y7 = _mm512_loadu_si512((__m512i *)(buf + 0x80));
y8 = _mm512_loadu_si512((__m512i *)(buf + 0xC0));

x1 = _mm512_xor_si512(x1, x5);
x2 = _mm512_xor_si512(x2, x6);
x3 = _mm512_xor_si512(x3, x7);
x4 = _mm512_xor_si512(x4, x8);

x1 = _mm512_xor_si512(x1, y5);
x2 = _mm512_xor_si512(x2, y6);
x3 = _mm512_xor_si512(x3, y7);
x4 = _mm512_xor_si512(x4, y8);

buf += 256;
len -= 256;
}

/*
* Fold into 512-bits.
*/
x0 = _mm512_load_si512((__m512i *)k3k4);

x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
x1 = _mm512_xor_si512(x1, x2);
x1 = _mm512_xor_si512(x1, x5);

x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
x1 = _mm512_xor_si512(x1, x3);
x1 = _mm512_xor_si512(x1, x5);

x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
x1 = _mm512_xor_si512(x1, x4);
x1 = _mm512_xor_si512(x1, x5);

/*
* Single fold blocks of 64, if any.
*/
while (len >= 64)
{
x2 = _mm512_loadu_si512((__m512i *)buf);

x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
x1 = _mm512_xor_si512(x1, x2);
x1 = _mm512_xor_si512(x1, x5);

buf += 64;
len -= 64;
}

/*
* Fold 512-bits to 384-bits.
*/
a0 = _mm_load_si128((__m128i *)k5k6);

a1 = _mm512_extracti32x4_epi32(x1, 0);
a2 = _mm512_extracti32x4_epi32(x1, 1);

a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
a1 = _mm_clmulepi64_si128(a1, a0, 0x11);

a1 = _mm_xor_si128(a1, a3);
a1 = _mm_xor_si128(a1, a2);

/*
* Fold 384-bits to 256-bits.
*/
a2 = _mm512_extracti32x4_epi32(x1, 2);
a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
a1 = _mm_xor_si128(a1, a3);
a1 = _mm_xor_si128(a1, a2);

/*
* Fold 256-bits to 128-bits.
*/
a2 = _mm512_extracti32x4_epi32(x1, 3);
a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
a1 = _mm_xor_si128(a1, a3);
a1 = _mm_xor_si128(a1, a2);

/*
* Fold 128-bits to 64-bits.
*/
a2 = _mm_clmulepi64_si128(a1, a0, 0x10);
a3 = _mm_setr_epi32(~0, 0, ~0, 0);
a1 = _mm_srli_si128(a1, 8);
a1 = _mm_xor_si128(a1, a2);

a0 = _mm_loadl_epi64((__m128i*)k7k8);
a2 = _mm_srli_si128(a1, 4);
a1 = _mm_and_si128(a1, a3);
a1 = _mm_clmulepi64_si128(a1, a0, 0x00);
a1 = _mm_xor_si128(a1, a2);

/*
* Barret reduce to 32-bits.
*/
a0 = _mm_load_si128((__m128i*)poly);

a2 = _mm_and_si128(a1, a3);
a2 = _mm_clmulepi64_si128(a2, a0, 0x10);
a2 = _mm_and_si128(a2, a3);
a2 = _mm_clmulepi64_si128(a2, a0, 0x00);
a1 = _mm_xor_si128(a1, a2);

/*
* Return the crc32.
*/
return _mm_extract_epi32(a1, 1);
}

#elif defined(CRC32_SIMD_SSE42_PCLMUL)

/*
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
* length must be at least 64, and a multiple of 16.
*/

#include <emmintrin.h>
#include <smmintrin.h>
#include <wmmintrin.h>
Expand Down
6 changes: 6 additions & 0 deletions deps/zlib/crc32_simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,18 @@ uint32_t ZLIB_INTERNAL crc32_sse42_simd_(const unsigned char* buf,
z_size_t len,
uint32_t crc);

uint32_t ZLIB_INTERNAL crc32_avx512_simd_(const unsigned char* buf,
z_size_t len,
uint32_t crc);

/*
* crc32_sse42_simd_ buffer size constraints: see the use in zlib/crc32.c
* for computing the crc32 of an arbitrary length buffer.
*/
#define Z_CRC32_SSE42_MINIMUM_LENGTH 64
#define Z_CRC32_SSE42_CHUNKSIZE_MASK 15
#define Z_CRC32_AVX512_MINIMUM_LENGTH 256
#define Z_CRC32_AVX512_CHUNKSIZE_MASK 63

/*
* CRC32 checksums using ARMv8-a crypto instructions.
Expand Down
6 changes: 4 additions & 2 deletions deps/zlib/crc_folding.c
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,10 @@ unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s)
unsigned crc;
__m128i x_tmp0, x_tmp1, x_tmp2, crc_fold;

CRC_LOAD(s)
__m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
__m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1);
__m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2);
__m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3);

/*
* k1
Expand Down Expand Up @@ -491,7 +494,6 @@ unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s)

crc = _mm_extract_epi32(xmm_crc3, 2);
return ~crc;
CRC_SAVE(s)
}

#endif /* CRC32_SIMD_SSE42_PCLMUL */
5 changes: 3 additions & 2 deletions deps/zlib/deflate.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
#endif

const char deflate_copyright[] =
" deflate 1.2.13 Copyright 1995-2022 Jean-loup Gailly and Mark Adler ";
" deflate 1.2.13.1 Copyright 1995-2022 Jean-loup Gailly and Mark Adler ";
/*
If you use the zlib library in a product, an acknowledgment is welcome
in the documentation of your product. If for some reason you cannot
Expand Down Expand Up @@ -774,7 +774,8 @@ uLong ZEXPORT deflateBound(strm, sourceLen)

/* if not default parameters, return one of the conservative bounds */
if (s->w_bits != 15 || s->hash_bits != 8 + 7)
return (s->w_bits <= s->hash_bits ? fixedlen : storelen) + wraplen;
return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) +
wraplen;

/* default settings: return tight bound for that case -- ~0.03% overhead
plus a small constant */
Expand Down
Loading