Skip to content

Commit

Permalink
Add HTMLMediaElement.preservesPitch
Browse files Browse the repository at this point in the history
To adjust the playback rate of audio, we use two algorithms: resampling,
which shifts the audio's pitch, and WSOLA, which doesn't. At rates close
to 1.0, WSOLA can introduce undesirable artifacts for certain types of,
audio content, so we use resampling instead. However, this pitch shift
is noticeable can be less desirable than the WSOLA artifacts to some
users.

This CL implements the preservesPitch flag, which is already present in
Firefox and Safari. The flag allows web developers to choose between
pitch preserving and pitch shifting time stretch algorithms (resampling
and WSOLA in our case). This will allow users to chose the algorithm
that suites their needs at playback rates close to 1.0. It will also
allow users to use resampling at lower or higher playback rates, when
pitch shifting is desirable for aesthetic or performance purposes.

Bug: 1072067, 1096238
Change-Id: Ie0e7ff337da77c902a12259c30c33125104101ef
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2255179
Commit-Queue: Thomas Guilbert <tguilbert@chromium.org>
Reviewed-by: Chris Harrelson <chrishtr@chromium.org>
Reviewed-by: Dale Curtis <dalecurtis@chromium.org>
Cr-Commit-Position: refs/heads/master@{#782141}
  • Loading branch information
tguilbert-google authored and Commit Bot committed Jun 24, 2020
1 parent 9a07a0f commit dd0e3e6
Show file tree
Hide file tree
Showing 37 changed files with 216 additions and 35 deletions.
4 changes: 4 additions & 0 deletions media/base/audio_renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ class MEDIA_EXPORT AudioRenderer {
// (restore UA default).
virtual void SetLatencyHint(base::Optional<base::TimeDelta> latency_hint) = 0;

// Sets a flag indicating that the AudioRenderer should use or avoid pitch
// preservation when playing back at speeds other than 1.0.
virtual void SetPreservesPitch(bool preserves_pitch) = 0;

private:
DISALLOW_COPY_AND_ASSIGN(AudioRenderer);
};
Expand Down
5 changes: 4 additions & 1 deletion media/base/mock_filters.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class MockPipeline : public Pipeline {
MOCK_CONST_METHOD0(GetVolume, float());
MOCK_METHOD1(SetVolume, void(float));
MOCK_METHOD1(SetLatencyHint, void(base::Optional<base::TimeDelta>));
MOCK_METHOD1(SetPreservesPitch, void(bool));

// TODO(sandersd): These should probably have setters too.
MOCK_CONST_METHOD0(GetMediaTime, base::TimeDelta());
Expand Down Expand Up @@ -357,6 +358,7 @@ class MockAudioRenderer : public AudioRenderer {
MOCK_METHOD1(SetVolume, void(float volume));
MOCK_METHOD1(SetLatencyHint,
void(base::Optional<base::TimeDelta> latency_hint));
MOCK_METHOD1(SetPreservesPitch, void(bool));

private:
DISALLOW_COPY_AND_ASSIGN(MockAudioRenderer);
Expand All @@ -378,7 +380,8 @@ class MockRenderer : public Renderer {
RendererClient* client,
PipelineStatusCallback& init_cb));
MOCK_METHOD1(SetLatencyHint, void(base::Optional<base::TimeDelta>));
void Flush(base::OnceClosure flush_cb) { OnFlush(flush_cb); }
MOCK_METHOD1(SetPreservesPitch, void(bool));
void Flush(base::OnceClosure flush_cb) override { OnFlush(flush_cb); }
MOCK_METHOD1(OnFlush, void(base::OnceClosure& flush_cb));
MOCK_METHOD1(StartPlayingFrom, void(base::TimeDelta timestamp));
MOCK_METHOD1(SetPlaybackRate, void(double playback_rate));
Expand Down
4 changes: 4 additions & 0 deletions media/base/pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,10 @@ class MEDIA_EXPORT Pipeline {
// can choose its own default.
virtual void SetLatencyHint(base::Optional<base::TimeDelta> latency_hint) = 0;

// Sets whether pitch adjustment should be applied when the playback rate is
// different than 1.0.
virtual void SetPreservesPitch(bool preserves_pitch) = 0;

// Returns the current media playback time, which progresses from 0 until
// GetMediaDuration().
virtual base::TimeDelta GetMediaTime() const = 0;
Expand Down
26 changes: 26 additions & 0 deletions media/base/pipeline_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class PipelineImpl::RendererWrapper : public DemuxerHost,
void SetPlaybackRate(double playback_rate);
void SetVolume(float volume);
void SetLatencyHint(base::Optional<base::TimeDelta> latency_hint);
void SetPreservesPitch(bool preserves_pitch);
base::TimeDelta GetMediaTime() const;
Ranges<base::TimeDelta> GetBufferedTimeRanges() const;
bool DidLoadingProgress();
Expand Down Expand Up @@ -193,6 +194,9 @@ class PipelineImpl::RendererWrapper : public DemuxerHost,
base::Optional<base::TimeDelta> latency_hint_;
CdmContext* cdm_context_;

// By default, apply pitch adjustments.
bool preserves_pitch_ = true;

// Lock used to serialize |shared_state_|.
// TODO(crbug.com/893739): Add GUARDED_BY annotations.
mutable base::Lock shared_state_lock_;
Expand Down Expand Up @@ -482,6 +486,17 @@ void PipelineImpl::RendererWrapper::SetLatencyHint(
shared_state_.renderer->SetLatencyHint(latency_hint_);
}

void PipelineImpl::RendererWrapper::SetPreservesPitch(bool preserves_pitch) {
DCHECK(media_task_runner_->BelongsToCurrentThread());

if (preserves_pitch_ == preserves_pitch)
return;

preserves_pitch_ = preserves_pitch;
if (shared_state_.renderer)
shared_state_.renderer->SetPreservesPitch(preserves_pitch_);
}

base::TimeDelta PipelineImpl::RendererWrapper::GetMediaTime() const {
DCHECK(main_task_runner_->BelongsToCurrentThread());

Expand Down Expand Up @@ -1041,6 +1056,8 @@ void PipelineImpl::RendererWrapper::InitializeRenderer(
if (latency_hint_)
shared_state_.renderer->SetLatencyHint(latency_hint_);

shared_state_.renderer->SetPreservesPitch(preserves_pitch_);

shared_state_.renderer->Initialize(demuxer_, this, std::move(done_cb));
}

Expand Down Expand Up @@ -1357,6 +1374,15 @@ void PipelineImpl::SetLatencyHint(
base::Unretained(renderer_wrapper_.get()), latency_hint));
}

void PipelineImpl::SetPreservesPitch(bool preserves_pitch) {
DCHECK(thread_checker_.CalledOnValidThread());

media_task_runner_->PostTask(
FROM_HERE, base::BindOnce(&RendererWrapper::SetPreservesPitch,
base::Unretained(renderer_wrapper_.get()),
preserves_pitch));
}

base::TimeDelta PipelineImpl::GetMediaTime() const {
DCHECK(thread_checker_.CalledOnValidThread());

Expand Down
1 change: 1 addition & 0 deletions media/base/pipeline_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class MEDIA_EXPORT PipelineImpl : public Pipeline {
float GetVolume() const override;
void SetVolume(float volume) override;
void SetLatencyHint(base::Optional<base::TimeDelta> latency_hint) override;
void SetPreservesPitch(bool preserves_pitch) override;
base::TimeDelta GetMediaTime() const override;
Ranges<base::TimeDelta> GetBufferedTimeRanges() const override;
base::TimeDelta GetMediaDuration() const override;
Expand Down
24 changes: 24 additions & 0 deletions media/base/pipeline_impl_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ class PipelineImplTest : public ::testing::Test {
.WillRepeatedly(Return(base::TimeDelta()));

EXPECT_CALL(*demuxer_, GetStartTime()).WillRepeatedly(Return(start_time_));

EXPECT_CALL(*renderer_, SetPreservesPitch(true)).Times(AnyNumber());
}

~PipelineImplTest() override {
Expand Down Expand Up @@ -302,6 +304,7 @@ class PipelineImplTest : public ::testing::Test {
// |renderer_| has been deleted, replace it.
scoped_renderer_.reset(new StrictMock<MockRenderer>());
renderer_ = scoped_renderer_.get();
EXPECT_CALL(*renderer_, SetPreservesPitch(_)).Times(AnyNumber());
}

void ExpectResume(const base::TimeDelta& seek_time) {
Expand Down Expand Up @@ -606,6 +609,10 @@ TEST_F(PipelineImplTest, SuspendResume) {
EXPECT_EQ(stats.video_memory_usage,
pipeline_->GetStatistics().video_memory_usage);

// Make sure the preserves pitch flag is preserved between after resuming.
EXPECT_CALL(*renderer_, SetPreservesPitch(false)).Times(1);
pipeline_->SetPreservesPitch(false);

ExpectSuspend();
DoSuspend();

Expand All @@ -614,6 +621,8 @@ TEST_F(PipelineImplTest, SuspendResume) {

base::TimeDelta expected = base::TimeDelta::FromSeconds(2000);
ExpectResume(expected);
EXPECT_CALL(*renderer_, SetPreservesPitch(false)).Times(1);

DoResume(expected);
}

Expand All @@ -631,6 +640,21 @@ TEST_F(PipelineImplTest, SetVolume) {
base::RunLoop().RunUntilIdle();
}

TEST_F(PipelineImplTest, SetPreservesPitch) {
CreateAudioStream();
SetDemuxerExpectations();

// The audio renderer preserve pitch by default.
EXPECT_CALL(*renderer_, SetPreservesPitch(true));
StartPipelineAndExpect(PIPELINE_OK);
base::RunLoop().RunUntilIdle();

// Changes to the preservesPitch flag should be propagated.
EXPECT_CALL(*renderer_, SetPreservesPitch(false));
pipeline_->SetPreservesPitch(false);
base::RunLoop().RunUntilIdle();
}

TEST_F(PipelineImplTest, Properties) {
CreateVideoStream();
const auto kDuration = base::TimeDelta::FromSeconds(100);
Expand Down
4 changes: 4 additions & 0 deletions media/base/renderer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,8 @@ void Renderer::OnEnabledAudioTracksChanged(
std::move(change_completed_cb).Run();
}

void Renderer::SetPreservesPitch(bool preserves_pitch) {
// Not supported by most renderers.
}

} // namespace media
4 changes: 4 additions & 0 deletions media/base/renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ class MEDIA_EXPORT Renderer {
// thresholds.
virtual void SetLatencyHint(base::Optional<base::TimeDelta> latency_hint) = 0;

// Sets whether pitch adjustment should be applied when the playback rate is
// different than 1.0.
virtual void SetPreservesPitch(bool preserves_pitch);

// The following functions must be called after Initialize().

// Discards any buffered data, executing |flush_cb| when completed.
Expand Down
5 changes: 5 additions & 0 deletions media/blink/webmediaplayer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,11 @@ void WebMediaPlayerImpl::SetLatencyHint(double seconds) {
pipeline_controller_->SetLatencyHint(latency_hint);
}

void WebMediaPlayerImpl::SetPreservesPitch(bool preserves_pitch) {
DCHECK(main_task_runner_->BelongsToCurrentThread());
pipeline_controller_->SetPreservesPitch(preserves_pitch);
}

void WebMediaPlayerImpl::OnRequestPictureInPicture() {
if (!surface_layer_for_video_enabled_)
ActivateSurfaceLayerForVideo();
Expand Down
1 change: 1 addition & 0 deletions media/blink/webmediaplayer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ class MEDIA_BLINK_EXPORT WebMediaPlayerImpl
void SetRate(double rate) override;
void SetVolume(double volume) override;
void SetLatencyHint(double seconds) override;
void SetPreservesPitch(bool preserves_pitch) override;
void OnRequestPictureInPicture() override;
void OnTimeUpdate() override;
void SetSinkId(
Expand Down
23 changes: 15 additions & 8 deletions media/filters/audio_renderer_algorithm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,15 @@ int AudioRendererAlgorithm::ResampleAndFill(AudioBus* dest,
base::Unretained(this)));
}

if (reached_end_of_stream_ && !audio_buffer_.frames()) {
// Previous calls to ResampleAndFill() and OnResamplerRead() have used all
// of the available buffers from |audio_buffer_|. Some valid input buffers
// might be stuck in |resampler_.BufferedFrames()|, but the rest is silence.
// Forgo the few remaining valid buffers, or else we will keep playing out
// silence forever and never trigger any "ended" events.
return 0;
}

// |resampler_| can request more than |requested_frames|, due to the
// requests size not being aligned. To prevent having to fill it with silence,
// we find the max number of reads it could request, and make sure we have
Expand Down Expand Up @@ -252,15 +261,9 @@ int AudioRendererAlgorithm::FillBuffer(AudioBus* dest,
return frames_read;
}

// WSOLA at playback rates that are close to 1.0 produces noticeable
// warbling and stuttering. We prefer resampling the audio at these speeds.
// This does results in a noticeable pitch shift.
// NOTE: The cutoff values are arbitrary, and picked based off of a tradeoff
// between "resample pitch shift" vs "WSOLA distortions".
if (kLowerResampleThreshold <= playback_rate &&
playback_rate <= kUpperResampleThreshold) {
// Use resampling when no pitch adjustments are needed.
if (!preserves_pitch_)
return ResampleAndFill(dest, dest_offset, requested_frames, playback_rate);
}

// Destroy the resampler if it was used before, but it's no longer needed
// (e.g. before playback rate has changed). This ensures that we don't try to
Expand Down Expand Up @@ -592,4 +595,8 @@ void AudioRendererAlgorithm::CreateSearchWrappers() {
AudioBus::WrapVector(search_block_->frames(), active_search_channels);
}

void AudioRendererAlgorithm::SetPreservesPitch(bool preserves_pitch) {
preserves_pitch_ = preserves_pitch;
}

} // namespace media
15 changes: 10 additions & 5 deletions media/filters/audio_renderer_algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,6 @@ class AudioBus;

class MEDIA_EXPORT AudioRendererAlgorithm {
public:
// Upper and lower bounds at which we prefer to use a resampler rather than
// WSOLA, to prevent audio artifacts.
static constexpr double kUpperResampleThreshold = 1.06;
static constexpr double kLowerResampleThreshold = 0.95;

AudioRendererAlgorithm(MediaLog* media_log);
AudioRendererAlgorithm(MediaLog* media_log,
AudioRendererAlgorithmParameters params);
Expand Down Expand Up @@ -90,6 +85,11 @@ class MEDIA_EXPORT AudioRendererAlgorithm {
// value of nullopt indicates the algorithm should restore the default value.
void SetLatencyHint(base::Optional<base::TimeDelta> latency_hint);

// Sets a flag indicating whether apply pitch adjustments when playing back
// at rates other than 1.0. Concretely, we use WSOLA when this is true, and
// resampling when this is false.
void SetPreservesPitch(bool preserves_pitch);

// Returns true if the |audio_buffer_| is >= |playback_threshold_|.
bool IsQueueAdequateForPlayback();

Expand Down Expand Up @@ -202,6 +202,11 @@ class MEDIA_EXPORT AudioRendererAlgorithm {
// start latency. See SetLatencyHint();
base::Optional<base::TimeDelta> latency_hint_;

// Whether to apply pitch adjusments or not when playing back at rates other
// than 1.0. In other words, we use WSOLA to preserve pitch when this is on,
// and resampling when this
bool preserves_pitch_ = true;

// How many frames to have in queue before beginning playback.
int64_t playback_threshold_;

Expand Down
41 changes: 20 additions & 21 deletions media/filters/audio_renderer_algorithm_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,9 @@ class AudioRendererAlgorithmTest : public testing::Test {
EXPECT_NEAR(playback_rate, actual_playback_rate, playback_rate / 100.0);
}

void TestPlaybackRateWithUnderflow(double playback_rate, bool end_of_stream) {
if (playback_rate > AudioRendererAlgorithm::kUpperResampleThreshold ||
playback_rate < AudioRendererAlgorithm::kLowerResampleThreshold) {
// This test is only used for the range in which we resample data instead
// of using WSOLA.
return;
}
void TestResamplingWithUnderflow(double playback_rate, bool end_of_stream) {
// We are only testing the behavior of the resampling case.
algorithm_.SetPreservesPitch(false);

if (end_of_stream) {
algorithm_.MarkEndOfStream();
Expand Down Expand Up @@ -452,13 +448,20 @@ TEST_F(AudioRendererAlgorithmTest, FillBuffer_NearlyNormalSlowerRate) {
// The range of playback rates in which we use resampling is [0.95, 1.06].
TEST_F(AudioRendererAlgorithmTest, FillBuffer_ResamplingRates) {
Initialize();
TestPlaybackRate(0.94); // WSOLA.
TestPlaybackRate(AudioRendererAlgorithm::kLowerResampleThreshold);
TestPlaybackRate(0.97);
// WSOLA.
TestPlaybackRate(0.50);
TestPlaybackRate(0.95);
TestPlaybackRate(1.00);
TestPlaybackRate(1.05);
TestPlaybackRate(2.00);

// Resampling.
algorithm_.SetPreservesPitch(false);
TestPlaybackRate(0.50);
TestPlaybackRate(0.95);
TestPlaybackRate(1.00);
TestPlaybackRate(1.04);
TestPlaybackRate(AudioRendererAlgorithm::kUpperResampleThreshold);
TestPlaybackRate(1.07); // WSOLA.
TestPlaybackRate(1.05);
TestPlaybackRate(2.00);
}

TEST_F(AudioRendererAlgorithmTest, FillBuffer_WithOffset) {
Expand All @@ -480,14 +483,10 @@ TEST_F(AudioRendererAlgorithmTest, FillBuffer_WithOffset) {

TEST_F(AudioRendererAlgorithmTest, FillBuffer_UnderFlow) {
Initialize();
TestPlaybackRateWithUnderflow(AudioRendererAlgorithm::kLowerResampleThreshold,
true);
TestPlaybackRateWithUnderflow(AudioRendererAlgorithm::kLowerResampleThreshold,
false);
TestPlaybackRateWithUnderflow(AudioRendererAlgorithm::kUpperResampleThreshold,
true);
TestPlaybackRateWithUnderflow(AudioRendererAlgorithm::kUpperResampleThreshold,
false);
TestResamplingWithUnderflow(0.75, true);
TestResamplingWithUnderflow(0.75, false);
TestResamplingWithUnderflow(1.25, true);
TestResamplingWithUnderflow(1.25, false);
}

TEST_F(AudioRendererAlgorithmTest, FillBuffer_OneAndAQuarterRate) {
Expand Down
4 changes: 4 additions & 0 deletions media/filters/pipeline_controller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,10 @@ void PipelineController::SetLatencyHint(
pipeline_->SetLatencyHint(latency_hint);
}

void PipelineController::SetPreservesPitch(bool preserves_pitch) {
pipeline_->SetPreservesPitch(preserves_pitch);
}

base::TimeDelta PipelineController::GetMediaTime() const {
return pipeline_->GetMediaTime();
}
Expand Down
1 change: 1 addition & 0 deletions media/filters/pipeline_controller.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ class MEDIA_EXPORT PipelineController {
float GetVolume() const;
void SetVolume(float volume);
void SetLatencyHint(base::Optional<base::TimeDelta> latency_hint);
void SetPreservesPitch(bool preserves_pitch);
base::TimeDelta GetMediaTime() const;
Ranges<base::TimeDelta> GetBufferedTimeRanges() const;
base::TimeDelta GetMediaDuration() const;
Expand Down
9 changes: 9 additions & 0 deletions media/filters/pipeline_controller_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -548,4 +548,13 @@ TEST_F(PipelineControllerTest, ResumePlaybackDuringSwitchingTracksState) {
PipelineController::State::SUSPENDED);
}

TEST_F(PipelineControllerTest, PreservesPitch) {
Complete(StartPipeline());
EXPECT_CALL(*pipeline_, SetPreservesPitch(false));
pipeline_controller_.SetPreservesPitch(false);

EXPECT_CALL(*pipeline_, SetPreservesPitch(true));
pipeline_controller_.SetPreservesPitch(true);
}

} // namespace media
2 changes: 2 additions & 0 deletions media/fuchsia/audio/fuchsia_audio_renderer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ void FuchsiaAudioRenderer::SetLatencyHint(
// shape and usefulness outside of fuchsia.
}

void FuchsiaAudioRenderer::SetPreservesPitch(bool preserves_pitch) {}

void FuchsiaAudioRenderer::StartTicking() {
DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);

Expand Down
Loading

0 comments on commit dd0e3e6

Please sign in to comment.