Skip to content

Commit

Permalink
Make the device clock members atomic
Browse files Browse the repository at this point in the history
Even though they're protected by a SeqLock of sorts, it's still UB to read and
write non-atomic vars from different threads. It's fine to do relaxed reads and
writes given the lock though, to help alleviate the cost.
  • Loading branch information
kcat committed Dec 3, 2023
1 parent e6bb912 commit 2c27d8b
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 25 deletions.
20 changes: 14 additions & 6 deletions alc/alc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -976,10 +976,18 @@ std::unique_ptr<Compressor> CreateDeviceLimiter(const ALCdevice *device, const f
*/
inline void UpdateClockBase(ALCdevice *device)
{
IncrementRef(device->MixCount);
device->ClockBase += nanoseconds{seconds{device->SamplesDone}} / device->Frequency;
device->SamplesDone = 0;
IncrementRef(device->MixCount);
const auto mixCount = device->MixCount.load(std::memory_order_relaxed);
device->MixCount.store(mixCount+1, std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_release);

auto samplesDone = device->mSamplesDone.load(std::memory_order_relaxed);
auto clockBase = device->mClockBase.load(std::memory_order_relaxed);

clockBase += nanoseconds{seconds{samplesDone}} / device->Frequency;
device->mClockBase.store(clockBase, std::memory_order_relaxed);
device->mSamplesDone.store(0, std::memory_order_relaxed);

device->MixCount.store(mixCount+2, std::memory_order_release);
}

/**
Expand Down Expand Up @@ -2504,8 +2512,8 @@ ALC_API void ALC_APIENTRY alcGetInteger64vSOFT(ALCdevice *device, ALCenum pname,
nanoseconds basecount;
do {
refcount = dev->waitForMix();
basecount = dev->ClockBase;
samplecount = dev->SamplesDone;
basecount = dev->mClockBase.load(std::memory_order_relaxed);
samplecount = dev->mSamplesDone.load(std::memory_order_relaxed);
} while(refcount != ReadRef(dev->MixCount));
basecount += nanoseconds{seconds{samplecount}} / dev->Frequency;
*values = basecount.count();
Expand Down
29 changes: 20 additions & 9 deletions alc/alu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1910,8 +1910,9 @@ void ProcessContexts(DeviceBase *device, const uint SamplesToDo)
{
ASSUME(SamplesToDo > 0);

const nanoseconds curtime{device->ClockBase +
nanoseconds{seconds{device->SamplesDone}}/device->Frequency};
const nanoseconds curtime{device->mClockBase.load(std::memory_order_relaxed) +
nanoseconds{seconds{device->mSamplesDone.load(std::memory_order_relaxed)}}/
device->Frequency};

for(ContextBase *ctx : *device->mContexts.load(std::memory_order_acquire))
{
Expand Down Expand Up @@ -2135,7 +2136,9 @@ uint DeviceBase::renderSamples(const uint numSamples)
buffer.fill(0.0f);

/* Increment the mix count at the start (lsb should now be 1). */
IncrementRef(MixCount);
const auto mixCount = MixCount.load(std::memory_order_relaxed);
MixCount.store(mixCount+1, std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_release);

/* Process and mix each context's sources and effects. */
ProcessContexts(this, samplesToDo);
Expand All @@ -2144,12 +2147,16 @@ uint DeviceBase::renderSamples(const uint numSamples)
* and added to clock base so that large sample counts don't overflow
* during conversion. This also guarantees a stable conversion.
*/
SamplesDone += samplesToDo;
ClockBase += std::chrono::seconds{SamplesDone / Frequency};
SamplesDone %= Frequency;
{
auto samplesDone = mSamplesDone.load(std::memory_order_relaxed) + samplesToDo;
auto clockBase = mClockBase.load(std::memory_order_relaxed) +
std::chrono::seconds{samplesDone/Frequency};
mSamplesDone.store(samplesDone%Frequency, std::memory_order_relaxed);
mClockBase.store(clockBase, std::memory_order_relaxed);
}

/* Increment the mix count at the end (lsb should now be 0). */
IncrementRef(MixCount);
MixCount.store(mixCount+2, std::memory_order_release);

/* Apply any needed post-process for finalizing the Dry mix to the RealOut
* (Ambisonic decode, UHJ encode, etc).
Expand Down Expand Up @@ -2225,7 +2232,10 @@ void DeviceBase::renderSamples(void *outBuffer, const uint numSamples, const siz

void DeviceBase::handleDisconnect(const char *msg, ...)
{
IncrementRef(MixCount);
const auto mixCount = MixCount.load(std::memory_order_relaxed);
MixCount.store(mixCount+1, std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_release);

if(Connected.exchange(false, std::memory_order_acq_rel))
{
AsyncEvent evt{std::in_place_type<AsyncDisconnectEvent>};
Expand Down Expand Up @@ -2267,5 +2277,6 @@ void DeviceBase::handleDisconnect(const char *msg, ...)
std::for_each(voicelist.begin(), voicelist.end(), stop_voice);
}
}
IncrementRef(MixCount);

MixCount.store(mixCount+2, std::memory_order_release);
}
10 changes: 2 additions & 8 deletions alc/backends/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,8 @@ enum class BackendType {
/* Helper to get the current clock time from the device's ClockBase, and
* SamplesDone converted from the sample rate.
*/
inline std::chrono::nanoseconds GetDeviceClockTime(DeviceBase *device)
{
using std::chrono::seconds;
using std::chrono::nanoseconds;

auto ns = nanoseconds{seconds{device->SamplesDone}} / device->Frequency;
return device->ClockBase + ns;
}
inline std::chrono::nanoseconds GetDeviceClockTime(const DeviceBase *device) noexcept
{ return device->getClockTime(); }

/* Helper to get the device latency from the backend, including any fixed
* latency from post-processing.
Expand Down
3 changes: 3 additions & 0 deletions core/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#include "mastering.h"


static_assert(std::atomic<std::chrono::nanoseconds>::is_always_lock_free);


al::FlexArray<ContextBase*> DeviceBase::sEmptyContextArray{0u};


Expand Down
18 changes: 16 additions & 2 deletions core/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,8 @@ struct DeviceBase {
*/
NfcFilter mNFCtrlFilter{};

uint SamplesDone{0u};
std::chrono::nanoseconds ClockBase{0};
std::atomic<uint> mSamplesDone{0u};
std::atomic<std::chrono::nanoseconds> mClockBase{std::chrono::nanoseconds{}};
std::chrono::nanoseconds FixedLatency{0};

AmbiRotateMatrix mAmbiRotateMatrix{};
Expand Down Expand Up @@ -307,6 +307,20 @@ struct DeviceBase {
return refcount;
}

/**
* Helper to get the current clock time from the device's ClockBase, and
* SamplesDone converted from the sample rate. Should only be called while
* watching the MixCount.
*/
std::chrono::nanoseconds getClockTime() const noexcept
{
using std::chrono::seconds;
using std::chrono::nanoseconds;

auto ns = nanoseconds{seconds{mSamplesDone.load(std::memory_order_relaxed)}} / Frequency;
return mClockBase.load(std::memory_order_relaxed) + ns;
}

void ProcessHrtf(const size_t SamplesToDo);
void ProcessAmbiDec(const size_t SamplesToDo);
void ProcessAmbiDecStablized(const size_t SamplesToDo);
Expand Down

0 comments on commit 2c27d8b

Please sign in to comment.