[vulkan] adaptive_avg_pool2d (pytorch#41220)

Summary: Pull Request resolved: pytorch#41220 Test Plan: Imported from OSS Reviewed By: AshkanAliabadi Differential Revision: D22754943 Pulled By: IvanKobzarev fbshipit-source-id: 91a94f32db005ebb693384f4d27efe66e2c33a14
zmmwl · Jul 28, 2020 · 4f72382 · 4f72382
1 parent 0a09601
commit 4f72382
Show file tree

Hide file tree

Showing 7 changed files with 175 additions and 0 deletions.
diff --git a/aten/src/ATen/native/AdaptiveAveragePooling.cpp b/aten/src/ATen/native/AdaptiveAveragePooling.cpp
@@ -2,6 +2,9 @@
 #include <ATen/NativeFunctions.h>
 #include <ATen/Parallel.h>
 #include <tuple>
+#ifdef USE_VULKAN
+#include <ATen/native/vulkan/VulkanAten.h>
+#endif
 
 
 namespace at {
@@ -325,6 +328,11 @@ namespace {
     if (input.is_mkldnn()) {
       return at::mkldnn_adaptive_avg_pool2d(input, output_size);
     }
+#ifdef USE_VULKAN
+    if (input.is_vulkan()) {
+      return at::native::vulkan_adaptive_avg_pool2d(input, output_size);
+    }
+#endif
 
     // TODO: fastpath for Channels_last should be explored later;
     if (input.suggest_memory_format() == at::MemoryFormat::Contiguous && !input.is_quantized() && output_size[0] == 1 && output_size[1] == 1) {

diff --git a/aten/src/ATen/native/vulkan/VulkanAten.cpp b/aten/src/ATen/native/vulkan/VulkanAten.cpp
@@ -142,6 +142,28 @@ at::Tensor upsample_nearest2d_vulkan(
   return output;
 }
 
+at::Tensor vulkan_adaptive_avg_pool2d(
+    const at::Tensor& input,
+    IntArrayRef outputSize) {
+  TORCH_INTERNAL_ASSERT(
+      input.dim() == 4,
+      "vulkan_adaptive_avg_pool2d expects 4-dimensional input");
+  auto& x = vtensor_from_vulkan(input);
+  auto inputSize = input.sizes();
+  auto in = inputSize[0];
+  auto ic = inputSize[1];
+  auto ih = inputSize[2];
+  auto iw = inputSize[3];
+
+  auto oh = outputSize[0];
+  auto ow = outputSize[1];
+  Tensor output = empty_vulkan({in, ic, oh, ow}, input.options(), {});
+  VulkanTensor& y = vtensor_from_vulkan(output);
+  y.allocate_storage();
+  vulkan::detail::adaptive_avg_pool2d(y, x, ih, iw, oh, ow, in, ic);
+  return output;
+}
+
 Tensor vulkan_add(const Tensor& self, const Tensor& other, Scalar alpha) {
   VulkanTensor& x = vtensor_from_vulkan(self);
   VulkanTensor& y = vtensor_from_vulkan(other);

diff --git a/aten/src/ATen/native/vulkan/VulkanAten.h b/aten/src/ATen/native/vulkan/VulkanAten.h
@@ -30,5 +30,9 @@ at::Tensor vulkan_convolution_prepacked(
     const float output_min,
     const float output_max);
 
+at::Tensor vulkan_adaptive_avg_pool2d(
+    const at::Tensor& input,
+    IntArrayRef output_size);
+
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/vulkan/VulkanOps.cpp b/aten/src/ATen/native/vulkan/VulkanOps.cpp
@@ -70,6 +70,56 @@ void upsample_nearest2d(
   vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr);
 }
 
+void adaptive_avg_pool2d(
+    VulkanTensor& output,
+    const VulkanTensor& input,
+    const int64_t IH,
+    const int64_t IW,
+    const int64_t OH,
+    const int64_t OW,
+    const int64_t IN,
+    const int64_t IC) {
+  auto device = context().device();
+  int64_t C = IN * IC;
+  struct ConstBlock {
+    int32_t IW;
+    int32_t IH;
+    int32_t OW;
+    int32_t OH;
+  };
+  ConstBlock cb{IW, IH, OW, OH};
+  VBuffer constBuffer = makeUniformConstBuffer((void*)&cb, sizeof(cb));
+
+  VkDescriptorSetLayout descriptorSetLayout{};
+  VkDescriptorPool descriptorPool{};
+  VkDescriptorSet descriptorSet{};
+  std::vector<VkDescriptorType> descriptorTypes{
+      VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+      VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+      VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER};
+  createDescriptorSetLayoutSinglePool(
+      device,
+      descriptorTypes,
+      &descriptorSetLayout,
+      &descriptorPool,
+      &descriptorSet);
+
+  output.image()->bindStorageImage(descriptorSet, 0);
+  input.image()->bindShaderRead(descriptorSet, 1);
+  constBuffer.bind(descriptorSet, 2);
+
+  WorkGroupSize workGroupSize{8, 8, 1};
+  auto& computeUnit = context().computeUnitFactory().get(
+      GLSL_SPV(adaptive_avg_pool2d), descriptorSetLayout, workGroupSize);
+  computeUnit.createCommandBuffer(descriptorSet);
+  input.image()->addImageMemoryBarrierToShaderRead(computeUnit.commandBuffer());
+  computeUnit.dispatchCommandBuffer(OW, OH, C, workGroupSize);
+  computeUnit.endCommandBuffer();
+  computeUnit.submitAndWaitCommandBuffer();
+  vkDestroyDescriptorPool(device, descriptorPool, nullptr);
+  vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr);
+}
+
 void add(
     VulkanTensor& output,
     const VulkanTensor& input0,

diff --git a/aten/src/ATen/native/vulkan/VulkanOps.h b/aten/src/ATen/native/vulkan/VulkanOps.h
@@ -19,6 +19,16 @@ void upsample_nearest2d(
     float scaleH,
     float scaleW);
 
+void adaptive_avg_pool2d(
+    VulkanTensor& output,
+    const VulkanTensor& input,
+    const int64_t IH,
+    const int64_t IW,
+    const int64_t OH,
+    const int64_t OW,
+    const int64_t IN,
+    const int64_t IC);
+
 void add(
     VulkanTensor& output,
     const VulkanTensor& input0,

diff --git a/aten/src/ATen/native/vulkan/glsl/adaptive_avg_pool2d.glsl b/aten/src/ATen/native/vulkan/glsl/adaptive_avg_pool2d.glsl
@@ -0,0 +1,41 @@
+#version 450 core
+layout(std430) buffer;
+layout(std430) uniform;
+layout(set = 0, rgba16f, binding = 0) writeonly highp uniform image3D uOutput;
+layout(set = 0, binding = 1) uniform highp sampler3D uInput;
+layout(set = 0, binding = 2) uniform constBlock {
+  int IW;
+  int IH;
+  int OW;
+  int OH;
+}
+uConstBlock;
+
+layout(local_size_x_id = 1, local_size_y_id = 2, local_size_z_id = 3) in;
+
+void main() {
+  ivec3 pos = ivec3(gl_GlobalInvocationID);
+  int ow = uConstBlock.OW;
+  int oh = uConstBlock.OH;
+  if (pos.x < ow && pos.y < oh) {
+    int iw = uConstBlock.IW;
+    int ih = uConstBlock.IH;
+
+    int sx = int(floor(float(pos.x * iw) / ow));
+    int sy = int(floor(float(pos.y * ih) / oh));
+    int ex = int(ceil(float((pos.x + 1) * iw) / ow));
+    int ey = int(ceil(float((pos.y + 1) * ih) / oh));
+
+    vec4 r = vec4(1.0) / float(ex - sx) / float(ey - sy);
+    vec4 acc = vec4(0);
+
+    int xi, yi;
+    for (xi = sx; xi < ex; ++xi) {
+      for (yi = sy; yi < ey; ++yi) {
+        acc += texelFetch(uInput, ivec3(xi, yi, pos.z), 0);
+      }
+    }
+
+    imageStore(uOutput, pos, r * acc);
+  }
+}
diff --git a/aten/src/ATen/test/vulkan_test.cpp b/aten/src/ATen/test/vulkan_test.cpp
@@ -517,3 +517,43 @@ TEST(VulkanTest, conv2dPrepack) {
   }
   ASSERT_TRUE(prepack_check);
 }
+
+TEST(VulkanTest, adaptive_avg_pool2d) {
+  if (!at::vulkan::is_available())
+    return;
+
+  auto t_in =
+      at::rand({1, 2, 7, 7}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+  auto t_out_expected = at::adaptive_avg_pool2d(t_in, {3, 3});
+  auto tv_in = t_in.vulkan();
+
+  auto tv_out = at::adaptive_avg_pool2d(tv_in, {3, 3});
+  auto t_out = tv_out.cpu();
+
+  const auto check = almostEqual(t_out, t_out_expected);
+  if (!check) {
+    std::cout << "expected:" << t_out_expected << std::endl;
+    std::cout << "got:" << t_out << std::endl;
+  }
+  ASSERT_TRUE(check);
+}
+
+TEST(VulkanTest, adaptive_avg_pool2d_2) {
+  if (!at::vulkan::is_available())
+    return;
+
+  auto t_in =
+      at::rand({1, 1280, 7, 7}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
+  auto t_out_expected = at::adaptive_avg_pool2d(t_in, {1, 1});
+  auto tv_in = t_in.vulkan();
+
+  auto tv_out = at::adaptive_avg_pool2d(tv_in, {1, 1});
+  auto t_out = tv_out.cpu();
+
+  const auto check = almostEqual(t_out, t_out_expected);
+  if (!check) {
+    std::cout << "expected:" << t_out_expected << std::endl;
+    std::cout << "got:" << t_out << std::endl;
+  }
+  ASSERT_TRUE(check);
+}