Add WIP Mojo D3D11VideoDecoder.

This is a WIP. Basic playback works, though EOS is ignored. There is quite a bit of thread-hopping, since the accelerator / decoder is run on the GPU main thread rather than the mojo thread. This will be eliminated in a later CL. Resource cleanup is of the "and the process will exit anyway" variety. It's based heavily on D3D11VideoDecodeAccelerator. Re-uses the H264 Accelerated Video Decoder for D3D11. The accelerator was written with the VDA in mind, so the VideoDecoder jumps through some minor hoops to use it also. It is intended that this will replace the VDA as soon as it reaches parity with it, and the accelerator can be modified for easier use with the VideoDecoder. Set "enable_d3d11_video_decoder=true" in gn config. This will enable the decoder, and: - enable MojoVideoDecoder - enable mojo CDM - set the media mojo host to be the GPU process. Also incorporates fixes to the d3d11_h264_accelerator from: https://chromium-review.googlesource.com/c/chromium/src/+/661237 by rkuroiwa@ . Bug: Change-Id: Ie5f2b651895bc812e5e6bba7c5eb302f2702a1b6 Reviewed-on: https://chromium-review.googlesource.com/693284 Commit-Queue: Frank Liberato <liberato@chromium.org> Reviewed-by: Dan Sanders <sandersd@chromium.org> Cr-Commit-Position: refs/heads/master@{#509615}
Denger-Network · Oct 18, 2017 · 33eeaea · 33eeaea
1 parent 0c68556
commit 33eeaea
Show file tree

Hide file tree

Showing 10 changed files with 866 additions and 17 deletions.
diff --git a/media/gpu/BUILD.gn b/media/gpu/BUILD.gn
@@ -17,6 +17,7 @@ buildflag_header("features") {
     "USE_V4L2_CODEC=$use_v4l2_codec",
     "USE_LIBV4L2=$use_v4lplugin",
     "ENABLE_MEDIA_CODEC_VIDEO_DECODER=$enable_media_codec_video_decoder",
+    "ENABLE_D3D11_VIDEO_DECODER=$enable_d3d11_video_decoder",
   ]
 }
 
@@ -376,6 +377,14 @@ component("gpu") {
       "/DELAYLOAD:mf.dll",
       "/DELAYLOAD:mfplat.dll",
     ]
+    if (enable_d3d11_video_decoder) {
+      sources += [
+        "windows/d3d11_video_decoder.cc",
+        "windows/d3d11_video_decoder.h",
+        "windows/d3d11_video_decoder_impl.cc",
+        "windows/d3d11_video_decoder_impl.h",
+      ]
+    }
   }
 }
 

diff --git a/media/gpu/d3d11_h264_accelerator.cc b/media/gpu/d3d11_h264_accelerator.cc
@@ -36,6 +36,17 @@ D3D11PictureBuffer::D3D11PictureBuffer(PictureBuffer picture_buffer,
                                        size_t level)
     : picture_buffer_(picture_buffer), level_(level) {}
 
+D3D11PictureBuffer::D3D11PictureBuffer(
+    PictureBuffer picture_buffer,
+    size_t level,
+    const std::vector<scoped_refptr<gpu::gles2::TextureRef>>& texture_refs,
+    const MailboxHolderArray& mailbox_holders)
+    : picture_buffer_(picture_buffer),
+      level_(level),
+      texture_refs_(texture_refs) {
+  memcpy(&mailbox_holders_, mailbox_holders, sizeof(mailbox_holders_));
+}
+
 D3D11PictureBuffer::~D3D11PictureBuffer() {}
 
 bool D3D11PictureBuffer::Init(
@@ -159,9 +170,19 @@ bool D3D11H264Accelerator::SubmitFrameMetadata(
       static_cast<D3D11H264Picture*>(pic.get()));
 
   HRESULT hr;
-  hr = video_context_->DecoderBeginFrame(
-      video_decoder_.Get(), our_pic->picture->output_view_.Get(), 0, nullptr);
-  CHECK(SUCCEEDED(hr));
+  for (;;) {
+    hr = video_context_->DecoderBeginFrame(
+        video_decoder_.Get(), our_pic->picture->output_view_.Get(), 0, nullptr);
+
+    if (hr == E_PENDING || hr == D3DERR_WASSTILLDRAWING) {
+      // Hardware is busy.  We should make the call again.
+      // TODO(liberato): For now, just busy wait.
+      ;
+    } else {
+      CHECK(SUCCEEDED(hr));
+      break;
+    }
+  }
 
   sps_ = *sps;
   for (size_t i = 0; i < 16; i++) {
@@ -175,6 +196,8 @@ bool D3D11H264Accelerator::SubmitFrameMetadata(
 
   int i = 0;
 
+  // TODO(liberato): this is similar to H264Accelerator.  can they share code?
+
   for (auto it = dpb.begin(); it != dpb.end(); it++) {
     scoped_refptr<D3D11H264Picture> our_ref_pic(
         static_cast<D3D11H264Picture*>(it->get()));
@@ -188,7 +211,7 @@ bool D3D11H264Accelerator::SubmitFrameMetadata(
     field_order_cnt_list_[i][1] = our_ref_pic->bottom_field_order_cnt;
     frame_num_list_[i] = ref_frame_list_[i].AssociatedFlag
                              ? our_ref_pic->long_term_pic_num
-                             : our_ref_pic->pic_num;
+                             : our_ref_pic->frame_num;
     int ref = 3;
     used_for_reference_flags_ |= ref << (2 * i);
     non_existing_frame_flags_ |= (our_ref_pic->nonexisting) << i;
@@ -220,9 +243,7 @@ bool D3D11H264Accelerator::SubmitSlice(const H264PPS* pps,
                                        size_t size) {
   scoped_refptr<D3D11H264Picture> our_pic(
       static_cast<D3D11H264Picture*>(pic.get()));
-
   DXVA_PicParams_H264 pic_param = {};
-
 #define FROM_SPS_TO_PP(a) pic_param.a = sps_.a
 #define FROM_SPS_TO_PP2(a, b) pic_param.a = sps_.b
 #define FROM_PPS_TO_PP(a) pic_param.a = pps->a
@@ -232,7 +253,7 @@ bool D3D11H264Accelerator::SubmitSlice(const H264PPS* pps,
   FROM_SPS_TO_PP2(wFrameWidthInMbsMinus1, pic_width_in_mbs_minus1);
   FROM_SPS_TO_PP2(wFrameHeightInMbsMinus1, pic_height_in_map_units_minus1);
   pic_param.CurrPic.Index7Bits = our_pic->level_;
-  // UNUSED: pic_param.CurrPic.AssociatedFlag = slide_hdr->field_pic_flag
+  pic_param.CurrPic.AssociatedFlag = slice_hdr->bottom_field_flag;
   FROM_SPS_TO_PP2(num_ref_frames, max_num_ref_frames);
 
   FROM_SLICE_TO_PP(field_pic_flag);
@@ -248,10 +269,15 @@ bool D3D11H264Accelerator::SubmitSlice(const H264PPS* pps,
   pic_param.MbsConsecutiveFlag = 1;
   FROM_SPS_TO_PP(frame_mbs_only_flag);
   FROM_PPS_TO_PP(transform_8x8_mode_flag);
-  // UNUSED: Minlumabipredsize
-  // UNUSED: pic_param.IntraPicFlag = slice_hdr->IsISlice();
+  // TODO(liberato): sandersd@ believes that this should only be set for level
+  // >= 3.1 .  verify this and fix as needed.
+  pic_param.MinLumaBipredSize8x8Flag = 1;
+  pic_param.IntraPicFlag = slice_hdr->IsISlice();
   FROM_SPS_TO_PP(bit_depth_luma_minus8);
   FROM_SPS_TO_PP(bit_depth_chroma_minus8);
+  // The latest DXVA decoding guide says to set this to 3 if the software
+  // decoder (this class) is following the guide.
+  pic_param.Reserved16Bits = 3;
   memcpy(pic_param.RefFrameList, ref_frame_list_,
          sizeof pic_param.RefFrameList);
   if (pic_param.field_pic_flag && pic_param.CurrPic.AssociatedFlag) {
@@ -436,6 +462,8 @@ void D3D11H264Accelerator::SubmitSliceData() {
   hr = video_context_->SubmitDecoderBuffers(video_decoder_.Get(), 4, buffers);
   current_offset_ = 0;
   slice_info_.clear();
+  bitstream_buffer_bytes_ = nullptr;
+  bitstream_buffer_size_ = 0;
 }
 
 bool D3D11H264Accelerator::SubmitDecode(const scoped_refptr<H264Picture>& pic) {
@@ -447,6 +475,18 @@ bool D3D11H264Accelerator::SubmitDecode(const scoped_refptr<H264Picture>& pic) {
   return true;
 }
 
+void D3D11H264Accelerator::Reset() {
+  if (bitstream_buffer_bytes_) {
+    HRESULT hr = video_context_->ReleaseDecoderBuffer(
+        video_decoder_.Get(), D3D11_VIDEO_DECODER_BUFFER_BITSTREAM);
+
+    bitstream_buffer_bytes_ = nullptr;
+    bitstream_buffer_size_ = 0;
+    current_offset_ = 0;
+    CHECK(SUCCEEDED(hr));
+  }
+}
+
 bool D3D11H264Accelerator::OutputPicture(
     const scoped_refptr<H264Picture>& pic) {
   scoped_refptr<D3D11H264Picture> our_pic(

diff --git a/media/gpu/d3d11_h264_accelerator.h b/media/gpu/d3d11_h264_accelerator.h
@@ -12,6 +12,8 @@
 #include <vector>
 
 #include "base/win/scoped_comptr.h"
+#include "gpu/command_buffer/service/texture_manager.h"
+#include "media/base/video_frame.h"
 #include "media/gpu/h264_decoder.h"
 #include "media/gpu/h264_dpb.h"
 #include "media/video/picture.h"
@@ -22,9 +24,18 @@
 namespace media {
 class D3D11H264Accelerator;
 
+// This must be freed on the main thread, since it has things like |gl_image_|
+// and |texture_refs_|.
 class D3D11PictureBuffer {
  public:
+  using MailboxHolderArray = gpu::MailboxHolder[VideoFrame::kMaxPlanes];
+
   D3D11PictureBuffer(PictureBuffer picture_buffer, size_t level);
+  D3D11PictureBuffer(
+      PictureBuffer picture_buffer,
+      size_t level,
+      const std::vector<scoped_refptr<gpu::gles2::TextureRef>>& texture_refs,
+      const MailboxHolderArray& mailbox_holders);
   ~D3D11PictureBuffer();
 
   bool Init(base::win::ScopedComPtr<ID3D11VideoDevice> video_device,
@@ -41,6 +52,11 @@ class D3D11PictureBuffer {
   void set_in_picture_use(bool use) { in_picture_use_ = use; }
   scoped_refptr<gl::GLImage> gl_image() const { return gl_image_; }
 
+  // For D3D11VideoDecoder.
+  const MailboxHolderArray& mailbox_holders() const { return mailbox_holders_; }
+  // Shouldn't be here, but simpler for now.
+  base::TimeDelta timestamp_;
+
  private:
   friend class D3D11H264Accelerator;
 
@@ -53,6 +69,10 @@ class D3D11PictureBuffer {
   EGLStreamKHR stream_;
   scoped_refptr<gl::GLImage> gl_image_;
 
+  // For D3D11VideoDecoder.
+  std::vector<scoped_refptr<gpu::gles2::TextureRef>> texture_refs_;
+  MailboxHolderArray mailbox_holders_;
+
   DISALLOW_COPY_AND_ASSIGN(D3D11PictureBuffer);
 };
 
@@ -90,7 +110,7 @@ class D3D11H264Accelerator : public H264Decoder::H264Accelerator {
                    const uint8_t* data,
                    size_t size) override;
   bool SubmitDecode(const scoped_refptr<H264Picture>& pic) override;
-  void Reset() override {}
+  void Reset() override;
   bool OutputPicture(const scoped_refptr<H264Picture>& pic) override;
 
  private:
@@ -115,8 +135,8 @@ class D3D11H264Accelerator : public H264Decoder::H264Accelerator {
   // Information that's accumulated during slices and submitted at the end
   std::vector<DXVA_Slice_H264_Short> slice_info_;
   size_t current_offset_ = 0;
-  size_t bitstream_buffer_size_;
-  uint8_t* bitstream_buffer_bytes_;
+  size_t bitstream_buffer_size_ = 0;
+  uint8_t* bitstream_buffer_bytes_ = nullptr;
 
   DISALLOW_COPY_AND_ASSIGN(D3D11H264Accelerator);
 };

diff --git a/media/gpu/windows/d3d11_video_decoder.cc b/media/gpu/windows/d3d11_video_decoder.cc
@@ -0,0 +1,145 @@
+// Copyright 2017 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/gpu/windows/d3d11_video_decoder.h"
+
+#include "base/bind.h"
+#include "base/callback.h"
+#include "media/base/bind_to_current_loop.h"
+#include "media/base/decoder_buffer.h"
+#include "media/base/video_codecs.h"
+#include "media/base/video_decoder_config.h"
+#include "media/base/video_frame.h"
+
+namespace {
+
+// Check |weak_ptr| and run |cb| with |args| if it's non-null.
+template <typename T, typename... Args>
+void CallbackOnProperThread(base::WeakPtr<T> weak_ptr,
+                            base::Callback<void(Args...)> cb,
+                            Args... args) {
+  if (weak_ptr.get())
+    cb.Run(args...);
+}
+
+// Given a callback, |cb|, return another callback that will call |cb| after
+// switching to the thread that BindToCurrent.... is called on.  We will check
+// |weak_ptr| on the current thread.  This is different than just calling
+// BindToCurrentLoop because we'll check the weak ptr.  If |cb| is some method
+// of |T|, then one can use BindToCurrentLoop directly.  However, in our case,
+// we have some unrelated callback that we'd like to call only if we haven't
+// been destroyed yet.  I suppose this could also just be a method:
+// template<CB, ...> D3D11VideoDecoder::CallSomeCallback(CB, ...) that's bound
+// via BindToCurrentLoop directly.
+template <typename T, typename... Args>
+base::Callback<void(Args...)> BindToCurrentThreadIfWeakPtr(
+    base::WeakPtr<T> weak_ptr,
+    base::Callback<void(Args...)> cb) {
+  return media::BindToCurrentLoop(
+      base::Bind(&CallbackOnProperThread<T, Args...>, weak_ptr, cb));
+}
+
+}  // namespace
+
+namespace media {
+
+D3D11VideoDecoder::D3D11VideoDecoder(
+    scoped_refptr<base::SingleThreadTaskRunner> gpu_task_runner,
+    base::Callback<gpu::GpuCommandBufferStub*()> get_stub_cb,
+    OutputWithReleaseMailboxCB output_cb)
+    : impl_task_runner_(std::move(gpu_task_runner)), weak_factory_(this) {
+  // We create |impl_| on the wrong thread, but we never use it here.
+  // Note that the output callback will hop to our thread, post the video
+  // frame, and along with a callback that will hop back to the impl thread
+  // when it's released.
+  impl_ = base::MakeUnique<D3D11VideoDecoderImpl>(
+      get_stub_cb, media::BindToCurrentLoop(base::Bind(
+                       &D3D11VideoDecoder::OutputWithThreadHoppingRelease,
+                       weak_factory_.GetWeakPtr(), std::move(output_cb))));
+  impl_weak_ = impl_->GetWeakPtr();
+}
+
+D3D11VideoDecoder::~D3D11VideoDecoder() {
+  // Post destruction to the main thread.  When this executes, it will also
+  // cancel pending callbacks into |impl_| via |impl_weak_|.  Callbacks out
+  // from |impl_| will be cancelled by |weak_factory_| when we return.
+  impl_task_runner_->DeleteSoon(FROM_HERE, std::move(impl_));
+}
+
+std::string D3D11VideoDecoder::GetDisplayName() const {
+  return "D3D11VideoDecoder";
+}
+
+void D3D11VideoDecoder::Initialize(const VideoDecoderConfig& config,
+                                   bool low_delay,
+                                   CdmContext* cdm_context,
+                                   const InitCB& init_cb,
+                                   const OutputCB& output_cb) {
+  bool is_h264 = config.profile() >= H264PROFILE_MIN &&
+                 config.profile() <= H264PROFILE_MAX;
+  if (!is_h264) {
+    init_cb.Run(false);
+    return;
+  }
+
+  // Bind our own init / output cb that hop to this thread, so we don't call the
+  // originals on some other thread.
+  // TODO(liberato): what's the lifetime of |cdm_context|?
+  impl_task_runner_->PostTask(
+      FROM_HERE,
+      base::BindOnce(
+          &VideoDecoder::Initialize, impl_weak_, config, low_delay, cdm_context,
+          BindToCurrentThreadIfWeakPtr(weak_factory_.GetWeakPtr(), init_cb),
+          BindToCurrentThreadIfWeakPtr(weak_factory_.GetWeakPtr(), output_cb)));
+}
+
+void D3D11VideoDecoder::Decode(const scoped_refptr<DecoderBuffer>& buffer,
+                               const DecodeCB& decode_cb) {
+  impl_task_runner_->PostTask(
+      FROM_HERE, base::BindOnce(&VideoDecoder::Decode, impl_weak_, buffer,
+                                BindToCurrentThreadIfWeakPtr(
+                                    weak_factory_.GetWeakPtr(), decode_cb)));
+}
+
+void D3D11VideoDecoder::Reset(const base::Closure& closure) {
+  impl_task_runner_->PostTask(
+      FROM_HERE, base::BindOnce(&VideoDecoder::Reset, impl_weak_,
+                                BindToCurrentThreadIfWeakPtr(
+                                    weak_factory_.GetWeakPtr(), closure)));
+}
+
+bool D3D11VideoDecoder::NeedsBitstreamConversion() const {
+  // Wrong thread, but it's okay.
+  return impl_->NeedsBitstreamConversion();
+}
+
+bool D3D11VideoDecoder::CanReadWithoutStalling() const {
+  // Wrong thread, but it's okay.
+  return impl_->CanReadWithoutStalling();
+}
+
+int D3D11VideoDecoder::GetMaxDecodeRequests() const {
+  // Wrong thread, but it's okay.
+  return impl_->GetMaxDecodeRequests();
+}
+
+void D3D11VideoDecoder::OutputWithThreadHoppingRelease(
+    OutputWithReleaseMailboxCB output_cb,
+    VideoFrame::ReleaseMailboxCB impl_thread_cb,
+    const scoped_refptr<VideoFrame>& video_frame) {
+  // Called on our thread to output a video frame.  Modify the release cb so
+  // that it jumps back to the impl thread.
+  output_cb.Run(
+      base::Bind(&D3D11VideoDecoder::OnMailboxReleased,
+                 weak_factory_.GetWeakPtr(), std::move(impl_thread_cb)),
+      video_frame);
+}
+
+void D3D11VideoDecoder::OnMailboxReleased(
+    VideoFrame::ReleaseMailboxCB impl_thread_cb,
+    const gpu::SyncToken& token) {
+  impl_task_runner_->PostTask(FROM_HERE, base::Bind(impl_thread_cb, token));
+}
+
+}  // namespace media