Skip to content

Commit

Permalink
[vulkan] adaptive_avg_pool2d (pytorch#41220)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: pytorch#41220

Test Plan: Imported from OSS

Reviewed By: AshkanAliabadi

Differential Revision: D22754943

Pulled By: IvanKobzarev

fbshipit-source-id: 91a94f32db005ebb693384f4d27efe66e2c33a14
  • Loading branch information
IvanKobzarev authored and facebook-github-bot committed Jul 28, 2020
1 parent 0a09601 commit 4f72382
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 0 deletions.
8 changes: 8 additions & 0 deletions aten/src/ATen/native/AdaptiveAveragePooling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
#include <ATen/NativeFunctions.h>
#include <ATen/Parallel.h>
#include <tuple>
#ifdef USE_VULKAN
#include <ATen/native/vulkan/VulkanAten.h>
#endif


namespace at {
Expand Down Expand Up @@ -325,6 +328,11 @@ namespace {
if (input.is_mkldnn()) {
return at::mkldnn_adaptive_avg_pool2d(input, output_size);
}
#ifdef USE_VULKAN
if (input.is_vulkan()) {
return at::native::vulkan_adaptive_avg_pool2d(input, output_size);
}
#endif

// TODO: fastpath for Channels_last should be explored later;
if (input.suggest_memory_format() == at::MemoryFormat::Contiguous && !input.is_quantized() && output_size[0] == 1 && output_size[1] == 1) {
Expand Down
22 changes: 22 additions & 0 deletions aten/src/ATen/native/vulkan/VulkanAten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,28 @@ at::Tensor upsample_nearest2d_vulkan(
return output;
}

at::Tensor vulkan_adaptive_avg_pool2d(
const at::Tensor& input,
IntArrayRef outputSize) {
TORCH_INTERNAL_ASSERT(
input.dim() == 4,
"vulkan_adaptive_avg_pool2d expects 4-dimensional input");
auto& x = vtensor_from_vulkan(input);
auto inputSize = input.sizes();
auto in = inputSize[0];
auto ic = inputSize[1];
auto ih = inputSize[2];
auto iw = inputSize[3];

auto oh = outputSize[0];
auto ow = outputSize[1];
Tensor output = empty_vulkan({in, ic, oh, ow}, input.options(), {});
VulkanTensor& y = vtensor_from_vulkan(output);
y.allocate_storage();
vulkan::detail::adaptive_avg_pool2d(y, x, ih, iw, oh, ow, in, ic);
return output;
}

Tensor vulkan_add(const Tensor& self, const Tensor& other, Scalar alpha) {
VulkanTensor& x = vtensor_from_vulkan(self);
VulkanTensor& y = vtensor_from_vulkan(other);
Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/native/vulkan/VulkanAten.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,9 @@ at::Tensor vulkan_convolution_prepacked(
const float output_min,
const float output_max);

at::Tensor vulkan_adaptive_avg_pool2d(
const at::Tensor& input,
IntArrayRef output_size);

} // namespace native
} // namespace at
50 changes: 50 additions & 0 deletions aten/src/ATen/native/vulkan/VulkanOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,56 @@ void upsample_nearest2d(
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr);
}

void adaptive_avg_pool2d(
VulkanTensor& output,
const VulkanTensor& input,
const int64_t IH,
const int64_t IW,
const int64_t OH,
const int64_t OW,
const int64_t IN,
const int64_t IC) {
auto device = context().device();
int64_t C = IN * IC;
struct ConstBlock {
int32_t IW;
int32_t IH;
int32_t OW;
int32_t OH;
};
ConstBlock cb{IW, IH, OW, OH};
VBuffer constBuffer = makeUniformConstBuffer((void*)&cb, sizeof(cb));

VkDescriptorSetLayout descriptorSetLayout{};
VkDescriptorPool descriptorPool{};
VkDescriptorSet descriptorSet{};
std::vector<VkDescriptorType> descriptorTypes{
VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER};
createDescriptorSetLayoutSinglePool(
device,
descriptorTypes,
&descriptorSetLayout,
&descriptorPool,
&descriptorSet);

output.image()->bindStorageImage(descriptorSet, 0);
input.image()->bindShaderRead(descriptorSet, 1);
constBuffer.bind(descriptorSet, 2);

WorkGroupSize workGroupSize{8, 8, 1};
auto& computeUnit = context().computeUnitFactory().get(
GLSL_SPV(adaptive_avg_pool2d), descriptorSetLayout, workGroupSize);
computeUnit.createCommandBuffer(descriptorSet);
input.image()->addImageMemoryBarrierToShaderRead(computeUnit.commandBuffer());
computeUnit.dispatchCommandBuffer(OW, OH, C, workGroupSize);
computeUnit.endCommandBuffer();
computeUnit.submitAndWaitCommandBuffer();
vkDestroyDescriptorPool(device, descriptorPool, nullptr);
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr);
}

void add(
VulkanTensor& output,
const VulkanTensor& input0,
Expand Down
10 changes: 10 additions & 0 deletions aten/src/ATen/native/vulkan/VulkanOps.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ void upsample_nearest2d(
float scaleH,
float scaleW);

void adaptive_avg_pool2d(
VulkanTensor& output,
const VulkanTensor& input,
const int64_t IH,
const int64_t IW,
const int64_t OH,
const int64_t OW,
const int64_t IN,
const int64_t IC);

void add(
VulkanTensor& output,
const VulkanTensor& input0,
Expand Down
41 changes: 41 additions & 0 deletions aten/src/ATen/native/vulkan/glsl/adaptive_avg_pool2d.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#version 450 core
layout(std430) buffer;
layout(std430) uniform;
layout(set = 0, rgba16f, binding = 0) writeonly highp uniform image3D uOutput;
layout(set = 0, binding = 1) uniform highp sampler3D uInput;
layout(set = 0, binding = 2) uniform constBlock {
int IW;
int IH;
int OW;
int OH;
}
uConstBlock;

layout(local_size_x_id = 1, local_size_y_id = 2, local_size_z_id = 3) in;

void main() {
ivec3 pos = ivec3(gl_GlobalInvocationID);
int ow = uConstBlock.OW;
int oh = uConstBlock.OH;
if (pos.x < ow && pos.y < oh) {
int iw = uConstBlock.IW;
int ih = uConstBlock.IH;

int sx = int(floor(float(pos.x * iw) / ow));
int sy = int(floor(float(pos.y * ih) / oh));
int ex = int(ceil(float((pos.x + 1) * iw) / ow));
int ey = int(ceil(float((pos.y + 1) * ih) / oh));

vec4 r = vec4(1.0) / float(ex - sx) / float(ey - sy);
vec4 acc = vec4(0);

int xi, yi;
for (xi = sx; xi < ex; ++xi) {
for (yi = sy; yi < ey; ++yi) {
acc += texelFetch(uInput, ivec3(xi, yi, pos.z), 0);
}
}

imageStore(uOutput, pos, r * acc);
}
}
40 changes: 40 additions & 0 deletions aten/src/ATen/test/vulkan_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,3 +517,43 @@ TEST(VulkanTest, conv2dPrepack) {
}
ASSERT_TRUE(prepack_check);
}

TEST(VulkanTest, adaptive_avg_pool2d) {
if (!at::vulkan::is_available())
return;

auto t_in =
at::rand({1, 2, 7, 7}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto t_out_expected = at::adaptive_avg_pool2d(t_in, {3, 3});
auto tv_in = t_in.vulkan();

auto tv_out = at::adaptive_avg_pool2d(tv_in, {3, 3});
auto t_out = tv_out.cpu();

const auto check = almostEqual(t_out, t_out_expected);
if (!check) {
std::cout << "expected:" << t_out_expected << std::endl;
std::cout << "got:" << t_out << std::endl;
}
ASSERT_TRUE(check);
}

TEST(VulkanTest, adaptive_avg_pool2d_2) {
if (!at::vulkan::is_available())
return;

auto t_in =
at::rand({1, 1280, 7, 7}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
auto t_out_expected = at::adaptive_avg_pool2d(t_in, {1, 1});
auto tv_in = t_in.vulkan();

auto tv_out = at::adaptive_avg_pool2d(tv_in, {1, 1});
auto t_out = tv_out.cpu();

const auto check = almostEqual(t_out, t_out_expected);
if (!check) {
std::cout << "expected:" << t_out_expected << std::endl;
std::cout << "got:" << t_out << std::endl;
}
ASSERT_TRUE(check);
}

0 comments on commit 4f72382

Please sign in to comment.