Skip to content
This repository has been archived by the owner on Oct 1, 2020. It is now read-only.

Commit

Permalink
Q8ADD operator, micro-kernels, tests, and benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
Marat Dukhan committed Nov 16, 2018
1 parent ee631d0 commit 08a3b6d
Show file tree
Hide file tree
Showing 17 changed files with 2,390 additions and 3 deletions.
32 changes: 30 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ SET(QNNPACK_OPERATOR_SRCS
src/operator-run.c
src/convolution.c
src/deconvolution.c
src/fully-connected.c)
src/fully-connected.c
src/add.c)

SET(QNNPACK_PSIMD_UKERNELS
src/sgemm/6x8-psimd.c)
Expand All @@ -149,6 +150,7 @@ SET(QNNPACK_ARM_NEON_UKERNELS
src/q8conv/8x8-neon.c
src/q8updw/9c8-neon.c
src/q8mpdw/25c8-neon.c
src/q8add/neon.c
src/sgemm/5x8-neon.c
src/sgemm/6x8-neon.c)

Expand All @@ -168,7 +170,8 @@ SET(QNNPACK_X86_SSE2_UKERNELS
src/q8gemm/4x4c2-sse2.c
src/q8conv/4x4c2-sse2.c
src/q8mpdw/25c8-sse2.c
src/q8updw/9c8-sse2.c)
src/q8updw/9c8-sse2.c
src/q8add/sse2.c)

SET(QNNPACK_UKERNELS ${QNNPACK_PSIMD_UKERNELS})
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv[5-8]" OR IOS_ARCH MATCHES "^armv7")
Expand Down Expand Up @@ -329,6 +332,15 @@ IF(QNNPACK_BUILD_TESTS)
TARGET_LINK_LIBRARIES(fully-connected-test PRIVATE qnnpack cpuinfo gtest gtest_main)
ADD_TEST(fully-connected-test fully-connected-test)

ADD_EXECUTABLE(add-test test/add.cc)
SET_TARGET_PROPERTIES(add-test PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS NO)
TARGET_INCLUDE_DIRECTORIES(add-test PRIVATE src test)
TARGET_LINK_LIBRARIES(add-test PRIVATE qnnpack cpuinfo gtest gtest_main)
ADD_TEST(add-test add-test)

# ---[ Build unit tests for micro-kernels
ADD_EXECUTABLE(q8gemm-test test/q8gemm.cc)
SET_TARGET_PROPERTIES(q8gemm-test PROPERTIES
Expand Down Expand Up @@ -366,6 +378,15 @@ IF(QNNPACK_BUILD_TESTS)
TARGET_LINK_LIBRARIES(q8mpdw-test PRIVATE qnnpack cpuinfo fp16 gtest gtest_main)
ADD_TEST(q8mpdw-test q8mpdw-test)

ADD_EXECUTABLE(q8uvadd-test test/q8uvadd.cc)
SET_TARGET_PROPERTIES(q8uvadd-test PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS NO)
TARGET_INCLUDE_DIRECTORIES(q8uvadd-test PRIVATE src test)
TARGET_LINK_LIBRARIES(q8uvadd-test PRIVATE qnnpack cpuinfo fp16 gtest gtest_main)
ADD_TEST(q8uvadd-test q8uvadd-test)

ADD_EXECUTABLE(hgemm-test test/hgemm.cc)
SET_TARGET_PROPERTIES(hgemm-test PROPERTIES
CXX_STANDARD 11
Expand Down Expand Up @@ -402,6 +423,13 @@ IF(QNNPACK_BUILD_BENCHMARKS)
CXX_EXTENSIONS NO)
TARGET_LINK_LIBRARIES(convolution-bench PRIVATE qnnpack benchmark)

ADD_EXECUTABLE(add-bench bench/add.cc)
SET_TARGET_PROPERTIES(add-bench PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
CXX_EXTENSIONS NO)
TARGET_LINK_LIBRARIES(add-bench PRIVATE qnnpack benchmark)

ADD_EXECUTABLE(q8gemm-bench bench/q8gemm.cc)
SET_TARGET_PROPERTIES(q8gemm-bench PROPERTIES
CXX_STANDARD 11
Expand Down
151 changes: 151 additions & 0 deletions bench/add.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <algorithm>
#include <cmath>
#include <functional>
#include <random>
#include <vector>

#include <qnnpack.h>

#include <benchmark/benchmark.h>


static void add_nc_q8(benchmark::State& state) {
const size_t batchSize = static_cast<size_t>(state.range(0));
const size_t channels = static_cast<size_t>(state.range(1));

std::random_device randomDevice;
auto rng = std::mt19937(randomDevice());
auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);

std::vector<uint8_t> a(batchSize * channels);
std::vector<uint8_t> b(batchSize * channels);
std::vector<uint8_t> y(batchSize * channels);

qnnp_status status = qnnp_initialize();
if (status != qnnp_status_success) {
state.SkipWithError("failed to initialize QNNPACK");
}

qnnp_operator_t addOperator = nullptr;
status = qnnp_create_add_nc_q8(
channels,
127 /* a:zero point */, 1.0f /* a:scale */,
127 /* b:zero point */, 1.0f /* b:scale */,
127 /* y:zero point */, 1.0f /* y:scale */,
1 /* y:min */, 254 /* y:max */,
&addOperator);
if (status != qnnp_status_success || addOperator == nullptr) {
state.SkipWithError("failed to create Q8 Add operator");
}

status = qnnp_setup_add_nc_q8(
addOperator,
batchSize,
a.data(), channels /* a:stride */,
b.data(), channels /* b:stride */,
y.data(), channels /* y:stride */);
if (status != qnnp_status_success) {
state.SkipWithError("failed to setup Q8 Add operator");
}

for (auto _ : state) {
status = qnnp_run_operator(addOperator, nullptr /* thread pool */);
if (status != qnnp_status_success) {
state.SkipWithError("failed to run Q8 Add operator");
}
}

const size_t itemsPerIteration = batchSize * channels;
state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(itemsPerIteration));

const size_t bytesPerIteration = 3 * itemsPerIteration * sizeof(uint8_t);
state.SetBytesProcessed(int64_t(state.iterations()) * int64_t(bytesPerIteration));

status = qnnp_delete_operator(addOperator);
if (status != qnnp_status_success) {
state.SkipWithError("failed to delete Q8 Add operator");
}
}

static void add_nc_q8_inplace(benchmark::State& state) {
const size_t batchSize = static_cast<size_t>(state.range(0));
const size_t channels = static_cast<size_t>(state.range(1));

std::random_device randomDevice;
auto rng = std::mt19937(randomDevice());
auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);

std::vector<uint8_t> a(batchSize * channels);
std::vector<uint8_t> y(batchSize * channels);

qnnp_status status = qnnp_initialize();
if (status != qnnp_status_success) {
state.SkipWithError("failed to initialize QNNPACK");
}

qnnp_operator_t addOperator = nullptr;
status = qnnp_create_add_nc_q8(
channels,
127 /* a:zero point */, 1.0f /* a:scale */,
127 /* b:zero point */, 1.0f /* b:scale */,
127 /* y:zero point */, 1.0f /* y:scale */,
1 /* y:min */, 254 /* y:max */,
&addOperator);
if (status != qnnp_status_success || addOperator == nullptr) {
state.SkipWithError("failed to create Q8 Add operator");
}

status = qnnp_setup_add_nc_q8(
addOperator,
batchSize,
a.data(), channels /* a:stride */,
y.data(), channels /* b:stride */,
y.data(), channels /* y:stride */);
if (status != qnnp_status_success) {
state.SkipWithError("failed to setup Q8 Add operator");
}

for (auto _ : state) {
status = qnnp_run_operator(addOperator, nullptr /* thread pool */);
if (status != qnnp_status_success) {
state.SkipWithError("failed to run Q8 Add operator");
}
}

const size_t itemsPerIteration = batchSize * channels;
state.SetItemsProcessed(int64_t(state.iterations()) * int64_t(itemsPerIteration));

const size_t bytesPerIteration = 3 * itemsPerIteration * sizeof(uint8_t);
state.SetBytesProcessed(int64_t(state.iterations()) * int64_t(bytesPerIteration));

status = qnnp_delete_operator(addOperator);
if (status != qnnp_status_success) {
state.SkipWithError("failed to delete Q8 Add operator");
}
}

static void CharacteristicArguments(benchmark::internal::Benchmark* b)
{
b->ArgNames({"N", "C"});

uint32_t c = 16;
for (uint32_t n = 224; n >= 7; n /= 2) {
b->Args({n * n, c});
c *= 2;
}
}

BENCHMARK(add_nc_q8)->Apply(CharacteristicArguments);
BENCHMARK(add_nc_q8_inplace)->Apply(CharacteristicArguments);

#ifndef QNNPACK_BENCHMARK_NO_MAIN
BENCHMARK_MAIN();
#endif
6 changes: 6 additions & 0 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def main(args):
build.cc("convolution.c"),
build.cc("deconvolution.c"),
build.cc("fully-connected.c"),
build.cc("add.c"),
]

with build.options(isa=arm.neon if build.target.is_arm else None):
Expand All @@ -78,6 +79,7 @@ def main(args):
with build.options(isa=arm.neon if build.target.is_arm else None):
if build.target.is_arm or build.target.is_arm64:
qnnpack_objects += [
build.cc("q8add/neon.c"),
build.cc("q8gemm/4x8-neon.c"),
build.cc("q8gemm/4x-sumrows-neon.c"),
build.cc("q8gemm/4x8c2-xzp-neon.c"),
Expand Down Expand Up @@ -106,6 +108,7 @@ def main(args):
if build.target.is_x86 or build.target.is_x86_64:
with build.options(isa=x86.sse2):
qnnpack_objects += [
build.cc("q8add/sse2.c"),
build.cc("q8gemm/2x4c8-sse2.c"),
build.cc("q8gemm/4x4c2-sse2.c"),
build.cc("q8conv/4x4c2-sse2.c"),
Expand All @@ -125,8 +128,10 @@ def main(args):
build.unittest("q8conv-test", build.cxx("q8conv.cc"))
build.unittest("q8updw-test", build.cxx("q8updw.cc"))
build.unittest("q8mpdw-test", build.cxx("q8mpdw.cc"))
build.unittest("q8uvadd-test", build.cxx("q8uvadd.cc"))
build.unittest("hgemm-test", build.cxx("hgemm.cc"))
build.unittest("sgemm-test", build.cxx("sgemm.cc"))
build.unittest("add-test", build.cxx("add.cc"))
build.unittest("convolution-test", build.cxx("convolution.cc"))
build.unittest("deconvolution-test", build.cxx("deconvolution.cc"))
build.unittest("fully-connected-test", build.cxx("fully-connected.cc"))
Expand All @@ -144,6 +149,7 @@ def main(args):
isa=benchmark_isa,
extra_include_dirs="src"):

build.benchmark("add-bench", build.cxx("add.cc"))
build.benchmark("convolution-bench", build.cxx("convolution.cc"))
build.benchmark("q8gemm-bench", build.cxx("q8gemm.cc"))
build.benchmark("hgemm-bench", build.cxx("hgemm.cc"))
Expand Down
22 changes: 22 additions & 0 deletions include/qnnpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,28 @@ enum qnnp_status qnnp_setup_fully_connected_nc_q8(
size_t output_stride,
pthreadpool_t threadpool);

enum qnnp_status qnnp_create_add_nc_q8(
size_t channels,
uint8_t a_zero_point,
float a_scale,
uint8_t b_zero_point,
float b_scale,
uint8_t sum_zero_point,
float sum_scale,
uint8_t sum_min,
uint8_t sum_max,
qnnp_operator_t* add);

enum qnnp_status qnnp_setup_add_nc_q8(
qnnp_operator_t add,
size_t batch_size,
const uint8_t* a,
size_t a_stride,
const uint8_t* b,
size_t b_stride,
uint8_t* sum,
size_t sum_stride);

enum qnnp_status qnnp_run_operator(
qnnp_operator_t op,
pthreadpool_t threadpool);
Expand Down
Loading

0 comments on commit 08a3b6d

Please sign in to comment.