Skip to content

Commit

Permalink
Add vectorized f32-vlog microkernels.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 645304919
  • Loading branch information
gonnet authored and xnnpack-bot committed Jun 28, 2024
1 parent 08f1489 commit 00ddbe4
Show file tree
Hide file tree
Showing 56 changed files with 11,508 additions and 39 deletions.
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ MICROKERNEL_DEPS = [

SIMD_HEADERS = ["src/xnnpack/simd/f32-" + arch + ".h" for arch in xnnpack_simd_archs()] + [
"src/xnnpack/simd/f32-avx-base.h",
"src/xnnpack/simd/f32-generic-functions.h",
]

exports_files(SIMD_HEADERS)
Expand Down
1 change: 1 addition & 0 deletions bench/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,7 @@ xnnpack_benchmark(
"f32_vabs",
"f32_velu",
"f32_vhswish",
"f32_vlog",
"f32_vlrelu",
"f32_vneg",
"f32_vrelu",
Expand Down
219 changes: 219 additions & 0 deletions bench/f32-vlog.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
// Copyright 2024 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
//
// Auto-generated file. Do not edit!
// Specification: test/f32-vlog.yaml
// Generator: tools/generate-vunary-benchmark.py

#include <stddef.h>
#include <stdint.h>

#include "xnnpack.h"
#include "xnnpack/aligned-allocator.h"
#include "xnnpack/common.h"
#include "xnnpack/microfnptr.h"
#include "xnnpack/microparams-init.h"
#include "xnnpack/microparams.h"
#include "xnnpack/vunary.h"

#include "bench/f32-vunary-benchmark.h"
#include "bench/utils.h"
#include <benchmark/benchmark.h>

void f32_vlog(benchmark::State& state, xnn_f32_vlog_ukernel_fn ukernel,
xnn_init_f32_default_params_fn init_params = nullptr,
benchmark::utils::IsaCheckFunction isa_check = nullptr) {
f32_vunary_benchmark<xnn_f32_default_params>(
state, ukernel,
init_params,
isa_check,
/*range_min=*/0.0,
/*range_max=*/10.0);
}

BENCHMARK_CAPTURE(f32_vlog, scalar_log_u1,
xnn_f32_vlog_ukernel__scalar_log_u1,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, scalar_log_u2,
xnn_f32_vlog_ukernel__scalar_log_u2,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, scalar_log_u4,
xnn_f32_vlog_ukernel__scalar_log_u4,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, scalar_rational_3_3_div_u1,
xnn_f32_vlog_ukernel__scalar_rational_3_3_div_u1,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, scalar_rational_3_3_div_u2,
xnn_f32_vlog_ukernel__scalar_rational_3_3_div_u2,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, scalar_rational_3_3_div_u4,
xnn_f32_vlog_ukernel__scalar_rational_3_3_div_u4,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, scalar_rational_3_3_div_u8,
xnn_f32_vlog_ukernel__scalar_rational_3_3_div_u8,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
#if XNN_ARCH_X86 || XNN_ARCH_X86_64
BENCHMARK_CAPTURE(f32_vlog, sse2_rational_3_3_div_u4,
xnn_f32_vlog_ukernel__sse2_rational_3_3_div_u4,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, sse2_rational_3_3_div_u8,
xnn_f32_vlog_ukernel__sse2_rational_3_3_div_u8,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, sse2_rational_3_3_div_u12,
xnn_f32_vlog_ukernel__sse2_rational_3_3_div_u12,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, sse2_rational_3_3_div_u16,
xnn_f32_vlog_ukernel__sse2_rational_3_3_div_u16,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, avx2_rational_3_3_div_u8,
xnn_f32_vlog_ukernel__avx2_rational_3_3_div_u8,
/*init_params=*/nullptr,
benchmark::utils::CheckAVX2)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, avx2_rational_3_3_div_u16,
xnn_f32_vlog_ukernel__avx2_rational_3_3_div_u16,
/*init_params=*/nullptr,
benchmark::utils::CheckAVX2)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, avx2_rational_3_3_div_u24,
xnn_f32_vlog_ukernel__avx2_rational_3_3_div_u24,
/*init_params=*/nullptr,
benchmark::utils::CheckAVX2)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, avx2_rational_3_3_div_u32,
xnn_f32_vlog_ukernel__avx2_rational_3_3_div_u32,
/*init_params=*/nullptr,
benchmark::utils::CheckAVX2)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, fma3_rational_3_3_div_u8,
xnn_f32_vlog_ukernel__fma3_rational_3_3_div_u8,
/*init_params=*/nullptr,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, fma3_rational_3_3_div_u16,
xnn_f32_vlog_ukernel__fma3_rational_3_3_div_u16,
/*init_params=*/nullptr,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, fma3_rational_3_3_div_u24,
xnn_f32_vlog_ukernel__fma3_rational_3_3_div_u24,
/*init_params=*/nullptr,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, fma3_rational_3_3_div_u32,
xnn_f32_vlog_ukernel__fma3_rational_3_3_div_u32,
/*init_params=*/nullptr,
benchmark::utils::CheckFMA3)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, avx512f_rational_3_3_div_u16,
xnn_f32_vlog_ukernel__avx512f_rational_3_3_div_u16,
/*init_params=*/nullptr,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, avx512f_rational_3_3_div_u32,
xnn_f32_vlog_ukernel__avx512f_rational_3_3_div_u32,
/*init_params=*/nullptr,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, avx512f_rational_3_3_div_u48,
xnn_f32_vlog_ukernel__avx512f_rational_3_3_div_u48,
/*init_params=*/nullptr,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, avx512f_rational_3_3_div_u64,
xnn_f32_vlog_ukernel__avx512f_rational_3_3_div_u64,
/*init_params=*/nullptr,
benchmark::utils::CheckAVX512F)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64

#if XNN_ARCH_ARM || XNN_ARCH_ARM64
BENCHMARK_CAPTURE(f32_vlog, neon_rational_3_3_div_u4,
xnn_f32_vlog_ukernel__neon_rational_3_3_div_u4,
/*init_params=*/nullptr,
benchmark::utils::CheckNEON)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, neon_rational_3_3_div_u8,
xnn_f32_vlog_ukernel__neon_rational_3_3_div_u8,
/*init_params=*/nullptr,
benchmark::utils::CheckNEON)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, neon_rational_3_3_div_u12,
xnn_f32_vlog_ukernel__neon_rational_3_3_div_u12,
/*init_params=*/nullptr,
benchmark::utils::CheckNEON)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, neon_rational_3_3_div_u16,
xnn_f32_vlog_ukernel__neon_rational_3_3_div_u16,
/*init_params=*/nullptr,
benchmark::utils::CheckNEON)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64

#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
BENCHMARK_CAPTURE(f32_vlog, wasmsimd_rational_3_3_div_u4,
xnn_f32_vlog_ukernel__wasmsimd_rational_3_3_div_u4,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, wasmsimd_rational_3_3_div_u8,
xnn_f32_vlog_ukernel__wasmsimd_rational_3_3_div_u8,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, wasmsimd_rational_3_3_div_u12,
xnn_f32_vlog_ukernel__wasmsimd_rational_3_3_div_u12,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
BENCHMARK_CAPTURE(f32_vlog, wasmsimd_rational_3_3_div_u16,
xnn_f32_vlog_ukernel__wasmsimd_rational_3_3_div_u16,
/*init_params=*/nullptr)
->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
->UseRealTime();
#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD


#ifndef XNNPACK_BENCHMARK_NO_MAIN
BENCHMARK_MAIN();
#endif
12 changes: 6 additions & 6 deletions bench/qs8-rsum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
#include "bench/utils.h"
#include <benchmark/benchmark.h>

#include <xnnpack.h>
#include <xnnpack/aligned-allocator.h>
#include <xnnpack/common.h>
#include <xnnpack/reduce.h>
#include <xnnpack/microfnptr.h>
#include <xnnpack/microparams-init.h>
#include "xnnpack.h"
#include "xnnpack/aligned-allocator.h"
#include "xnnpack/common.h"
#include "xnnpack/reduce.h"
#include "xnnpack/microfnptr.h"
#include "xnnpack/microparams-init.h"


BENCHMARK_CAPTURE(qs8_rsum, scalar_u1,
Expand Down
1 change: 1 addition & 0 deletions cmake/gen/avx2_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ SET(ALL_AVX2_MICROKERNEL_SRCS
src/f32-velu/gen/f32-velu-avx2-rr1-p6-u64.c
src/f32-velu/gen/f32-velu-avx2-rr1-p6-u72.c
src/f32-velu/gen/f32-velu-avx2-rr1-p6-u80.c
src/f32-vlog/gen/f32-vlog-avx2-rational-3-3-div.c
src/f32-vscaleexpminusmax/gen/f32-vscaleexpminusmax-avx2-p5-u8.c
src/f32-vscaleexpminusmax/gen/f32-vscaleexpminusmax-avx2-p5-u16.c
src/f32-vscaleexpminusmax/gen/f32-vscaleexpminusmax-avx2-p5-u24.c
Expand Down
1 change: 1 addition & 0 deletions cmake/gen/avx512f_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ SET(ALL_AVX512F_MICROKERNEL_SRCS
src/f32-velu/gen/f32-velu-avx512f-rr1-p6-u128.c
src/f32-vhswish/gen/f32-vhswish-avx512f-u16.c
src/f32-vhswish/gen/f32-vhswish-avx512f-u32.c
src/f32-vlog/gen/f32-vlog-avx512f-rational-3-3-div.c
src/f32-vlrelu/gen/f32-vlrelu-avx512f-u16.c
src/f32-vlrelu/gen/f32-vlrelu-avx512f-u32.c
src/f32-vrelu/gen/f32-vrelu-avx512f-u16.c
Expand Down
1 change: 1 addition & 0 deletions cmake/gen/fma3_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ SET(ALL_FMA3_MICROKERNEL_SRCS
src/f32-qc8w-gemm/gen/f32-qc8w-gemm-8x16-minmax-fma3-broadcast.c
src/f32-vhswish/gen/f32-vhswish-fma3-u8.c
src/f32-vhswish/gen/f32-vhswish-fma3-u16.c
src/f32-vlog/gen/f32-vlog-fma3-rational-3-3-div.c
src/f32-vrsqrt/gen/f32-vrsqrt-fma3-rsqrt-u8.c
src/f32-vrsqrt/gen/f32-vrsqrt-fma3-rsqrt-u16.c
src/f32-vrsqrt/gen/f32-vrsqrt-fma3-rsqrt-u32.c
Expand Down
1 change: 1 addition & 0 deletions cmake/gen/neon_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ SET(ALL_NEON_MICROKERNEL_SRCS
src/f32-vhswish/gen/f32-vhswish-neon-u4.c
src/f32-vhswish/gen/f32-vhswish-neon-u8.c
src/f32-vhswish/gen/f32-vhswish-neon-u16.c
src/f32-vlog/gen/f32-vlog-neon-rational-3-3-div.c
src/f32-vlrelu/gen/f32-vlrelu-neon-u4.c
src/f32-vlrelu/gen/f32-vlrelu-neon-u8.c
src/f32-vmulcaddc/gen/f32-vmulcaddc-c4-minmax-neon-2x.c
Expand Down
1 change: 1 addition & 0 deletions cmake/gen/scalar_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ SET(ALL_SCALAR_MICROKERNEL_SRCS
src/f32-vhswish/gen/f32-vhswish-scalar-u2.c
src/f32-vhswish/gen/f32-vhswish-scalar-u4.c
src/f32-vlog/gen/f32-vlog-scalar-log.c
src/f32-vlog/gen/f32-vlog-scalar-rational-3-3-div.c
src/f32-vlrelu/gen/f32-vlrelu-scalar-u1.c
src/f32-vlrelu/gen/f32-vlrelu-scalar-u2.c
src/f32-vlrelu/gen/f32-vlrelu-scalar-u4.c
Expand Down
1 change: 1 addition & 0 deletions cmake/gen/sse2_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ SET(ALL_SSE2_MICROKERNEL_SRCS
src/f32-velu/gen/f32-velu-sse2-rr2-p6-u16.c
src/f32-velu/gen/f32-velu-sse2-rr2-p6-u20.c
src/f32-velu/gen/f32-velu-sse2-rr2-p6-u24.c
src/f32-vlog/gen/f32-vlog-sse2-rational-3-3-div.c
src/f32-vlrelu/gen/f32-vlrelu-sse2-u4.c
src/f32-vlrelu/gen/f32-vlrelu-sse2-u8.c
src/f32-vrnd/gen/f32-vrndd-sse2-u4.c
Expand Down
1 change: 1 addition & 0 deletions cmake/gen/wasmsimd_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,7 @@ SET(ALL_WASMSIMD_MICROKERNEL_SRCS
src/f32-vhswish/gen/f32-vhswish-wasmsimd-u4.c
src/f32-vhswish/gen/f32-vhswish-wasmsimd-u8.c
src/f32-vhswish/gen/f32-vhswish-wasmsimd-u16.c
src/f32-vlog/gen/f32-vlog-wasmsimd-rational-3-3-div.c
src/f32-vlrelu/gen/f32-vlrelu-wasmsimd-iminmax-u4.c
src/f32-vlrelu/gen/f32-vlrelu-wasmsimd-iminmax-u8.c
src/f32-vlrelu/gen/f32-vlrelu-wasmsimd-laneselect-u4.c
Expand Down
1 change: 1 addition & 0 deletions gen/avx2_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ ALL_AVX2_MICROKERNEL_SRCS = [
"src/f32-velu/gen/f32-velu-avx2-rr1-p6-u64.c",
"src/f32-velu/gen/f32-velu-avx2-rr1-p6-u72.c",
"src/f32-velu/gen/f32-velu-avx2-rr1-p6-u80.c",
"src/f32-vlog/gen/f32-vlog-avx2-rational-3-3-div.c",
"src/f32-vscaleexpminusmax/gen/f32-vscaleexpminusmax-avx2-p5-u8.c",
"src/f32-vscaleexpminusmax/gen/f32-vscaleexpminusmax-avx2-p5-u16.c",
"src/f32-vscaleexpminusmax/gen/f32-vscaleexpminusmax-avx2-p5-u24.c",
Expand Down
1 change: 1 addition & 0 deletions gen/avx512f_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ ALL_AVX512F_MICROKERNEL_SRCS = [
"src/f32-velu/gen/f32-velu-avx512f-rr1-p6-u128.c",
"src/f32-vhswish/gen/f32-vhswish-avx512f-u16.c",
"src/f32-vhswish/gen/f32-vhswish-avx512f-u32.c",
"src/f32-vlog/gen/f32-vlog-avx512f-rational-3-3-div.c",
"src/f32-vlrelu/gen/f32-vlrelu-avx512f-u16.c",
"src/f32-vlrelu/gen/f32-vlrelu-avx512f-u32.c",
"src/f32-vrelu/gen/f32-vrelu-avx512f-u16.c",
Expand Down
1 change: 1 addition & 0 deletions gen/fma3_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ ALL_FMA3_MICROKERNEL_SRCS = [
"src/f32-qc8w-gemm/gen/f32-qc8w-gemm-8x16-minmax-fma3-broadcast.c",
"src/f32-vhswish/gen/f32-vhswish-fma3-u8.c",
"src/f32-vhswish/gen/f32-vhswish-fma3-u16.c",
"src/f32-vlog/gen/f32-vlog-fma3-rational-3-3-div.c",
"src/f32-vrsqrt/gen/f32-vrsqrt-fma3-rsqrt-u8.c",
"src/f32-vrsqrt/gen/f32-vrsqrt-fma3-rsqrt-u16.c",
"src/f32-vrsqrt/gen/f32-vrsqrt-fma3-rsqrt-u32.c",
Expand Down
1 change: 1 addition & 0 deletions gen/neon_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ ALL_NEON_MICROKERNEL_SRCS = [
"src/f32-vhswish/gen/f32-vhswish-neon-u4.c",
"src/f32-vhswish/gen/f32-vhswish-neon-u8.c",
"src/f32-vhswish/gen/f32-vhswish-neon-u16.c",
"src/f32-vlog/gen/f32-vlog-neon-rational-3-3-div.c",
"src/f32-vlrelu/gen/f32-vlrelu-neon-u4.c",
"src/f32-vlrelu/gen/f32-vlrelu-neon-u8.c",
"src/f32-vmulcaddc/gen/f32-vmulcaddc-c4-minmax-neon-2x.c",
Expand Down
1 change: 1 addition & 0 deletions gen/scalar_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,7 @@ ALL_SCALAR_MICROKERNEL_SRCS = [
"src/f32-vhswish/gen/f32-vhswish-scalar-u2.c",
"src/f32-vhswish/gen/f32-vhswish-scalar-u4.c",
"src/f32-vlog/gen/f32-vlog-scalar-log.c",
"src/f32-vlog/gen/f32-vlog-scalar-rational-3-3-div.c",
"src/f32-vlrelu/gen/f32-vlrelu-scalar-u1.c",
"src/f32-vlrelu/gen/f32-vlrelu-scalar-u2.c",
"src/f32-vlrelu/gen/f32-vlrelu-scalar-u4.c",
Expand Down
1 change: 1 addition & 0 deletions gen/sse2_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ ALL_SSE2_MICROKERNEL_SRCS = [
"src/f32-velu/gen/f32-velu-sse2-rr2-p6-u16.c",
"src/f32-velu/gen/f32-velu-sse2-rr2-p6-u20.c",
"src/f32-velu/gen/f32-velu-sse2-rr2-p6-u24.c",
"src/f32-vlog/gen/f32-vlog-sse2-rational-3-3-div.c",
"src/f32-vlrelu/gen/f32-vlrelu-sse2-u4.c",
"src/f32-vlrelu/gen/f32-vlrelu-sse2-u8.c",
"src/f32-vrnd/gen/f32-vrndd-sse2-u4.c",
Expand Down
1 change: 1 addition & 0 deletions gen/wasmsimd_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,7 @@ ALL_WASMSIMD_MICROKERNEL_SRCS = [
"src/f32-vhswish/gen/f32-vhswish-wasmsimd-u4.c",
"src/f32-vhswish/gen/f32-vhswish-wasmsimd-u8.c",
"src/f32-vhswish/gen/f32-vhswish-wasmsimd-u16.c",
"src/f32-vlog/gen/f32-vlog-wasmsimd-rational-3-3-div.c",
"src/f32-vlrelu/gen/f32-vlrelu-wasmsimd-iminmax-u4.c",
"src/f32-vlrelu/gen/f32-vlrelu-wasmsimd-iminmax-u8.c",
"src/f32-vlrelu/gen/f32-vlrelu-wasmsimd-laneselect-u4.c",
Expand Down
1 change: 1 addition & 0 deletions scripts/generate-benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ tools/generate-vunary-benchmark.py --spec test/f32-vsigmoid.yaml --output bench/
tools/generate-vunary-benchmark.py --spec test/f32-vsqr.yaml --output bench/f32-vsqr.cc &
tools/generate-vunary-benchmark.py --spec test/f32-vsqrt.yaml --output bench/f32-vsqrt.cc &
tools/generate-vunary-benchmark.py --spec test/f32-vtanh.yaml --output bench/f32-vtanh.cc &
tools/generate-vunary-benchmark.py --spec test/f32-vlog.yaml --output bench/f32-vlog.cc &

### Tests for VLRelu micro-kernels
tools/generate-vunary-benchmark.py --spec test/f16-vlrelu.yaml --output bench/f16-vlrelu.cc &
Expand Down
9 changes: 9 additions & 0 deletions scripts/generate-f32-vlog.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,13 @@
#################################### Scalar ###################################
tools/xngen src/f32-vlog/scalar-log.c.in -D BATCH_TILES=1,2,4 -o src/f32-vlog/gen/f32-vlog-scalar-log.c &

##################################### SIMD #####################################
tools/xngen src/f32-vlog/rational-3-3.c.in -D ARCH=scalar -D BATCH_TILES=1,2,4,8 -D DIV=DIV -o src/f32-vlog/gen/f32-vlog-scalar-rational-3-3-div.c &
tools/xngen src/f32-vlog/rational-3-3.c.in -D ARCH=sse2 -D BATCH_TILES=4,8,12,16 -D DIV=DIV -o src/f32-vlog/gen/f32-vlog-sse2-rational-3-3-div.c &
tools/xngen src/f32-vlog/rational-3-3.c.in -D ARCH=avx2 -D BATCH_TILES=8,16,24,32 -D DIV=DIV -o src/f32-vlog/gen/f32-vlog-avx2-rational-3-3-div.c &
tools/xngen src/f32-vlog/rational-3-3.c.in -D ARCH=fma3 -D BATCH_TILES=8,16,24,32 -D DIV=DIV -o src/f32-vlog/gen/f32-vlog-fma3-rational-3-3-div.c &
tools/xngen src/f32-vlog/rational-3-3.c.in -D ARCH=avx512f -D BATCH_TILES=16,32,48,64 -D DIV=DIV -o src/f32-vlog/gen/f32-vlog-avx512f-rational-3-3-div.c &
tools/xngen src/f32-vlog/rational-3-3.c.in -D ARCH=neon -D BATCH_TILES=4,8,12,16 -D DIV=DIV -o src/f32-vlog/gen/f32-vlog-neon-rational-3-3-div.c &
tools/xngen src/f32-vlog/rational-3-3.c.in -D ARCH=wasmsimd -D BATCH_TILES=4,8,12,16 -D DIV=DIV -o src/f32-vlog/gen/f32-vlog-wasmsimd-rational-3-3-div.c &

wait
Loading

0 comments on commit 00ddbe4

Please sign in to comment.