Skip to content

Commit

Permalink
io: remove slab in favor of Arc and allocations (#5833)
Browse files Browse the repository at this point in the history
This patch removes the custom slab in favor of regular allocations an
`Arc`. Originally, the slab was used to be able to pass indexes as
tokens to the I/O driver when registering I/O resources. However, this
has the downside of having a more expensive token lookup path. It also
pins a `ScheduledIo` to a specific I/O driver. Additionally, the slab is
approaching custom allocator territory.

We plan to explore migrating I/O resources between I/O drivers. As a
step towards that, we need to decouple `ScheduledIo` from the I/O
driver. To do this, the patch uses plain-old allocation to allocate the
`ScheduledIo` and we use the pointer as the token. To use the token, we
need to be very careful about releasing the `ScheduledIo`. We need to
make sure that the associated I/O handle is deregistered from the I/O
driver **and** there are no polls. The strategy in this PR is to let the
I/O driver do the final release between polls, but I expect this
strategy to evolve over time.
  • Loading branch information
carllerche committed Jun 29, 2023
1 parent 0c7d8d1 commit b573adc
Show file tree
Hide file tree
Showing 8 changed files with 222 additions and 1,037 deletions.
168 changes: 58 additions & 110 deletions tokio/src/runtime/io/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ cfg_signal_internal_and_unix! {

use crate::io::interest::Interest;
use crate::io::ready::Ready;
use crate::loom::sync::Mutex;
use crate::runtime::driver;
use crate::runtime::io::{IoDriverMetrics, ScheduledIo};
use crate::util::slab::{self, Slab};
use crate::{loom::sync::RwLock, util::bit};
use crate::runtime::io::registration_set;
use crate::runtime::io::{IoDriverMetrics, RegistrationSet, ScheduledIo};

use mio::event::Source;
use std::fmt;
use std::io;
use std::sync::Arc;
use std::time::Duration;

/// I/O driver, backed by Mio.
Expand All @@ -26,10 +28,6 @@ pub(crate) struct Driver {
/// Reuse the `mio::Events` value across calls to poll.
events: mio::Events,

/// Primary slab handle containing the state for each resource registered
/// with this driver.
resources: Slab<ScheduledIo>,

/// The system event queue.
poll: mio::Poll,
}
Expand All @@ -39,8 +37,11 @@ pub(crate) struct Handle {
/// Registers I/O resources.
registry: mio::Registry,

/// Allocates `ScheduledIo` handles when creating new resources.
io_dispatch: RwLock<IoDispatcher>,
/// Tracks all registrations
registrations: RegistrationSet,

/// State that should be synchronized
synced: Mutex<registration_set::Synced>,

/// Used to wake up the reactor from a call to `turn`.
/// Not supported on Wasi due to lack of threading support.
Expand Down Expand Up @@ -69,11 +70,6 @@ cfg_net_unix!(
}
);

struct IoDispatcher {
allocator: slab::Allocator<ScheduledIo>,
is_shutdown: bool,
}

#[derive(Debug, Eq, PartialEq, Clone, Copy)]
pub(super) enum Direction {
Read,
Expand All @@ -85,20 +81,8 @@ pub(super) enum Tick {
Clear(u8),
}

// TODO: Don't use a fake token. Instead, reserve a slot entry for the wakeup
// token.
const TOKEN_WAKEUP: mio::Token = mio::Token(1 << 31);
const TOKEN_SIGNAL: mio::Token = mio::Token(1 + (1 << 31));

const ADDRESS: bit::Pack = bit::Pack::least_significant(24);

// Packs the generation value in the `readiness` field.
//
// The generation prevents a race condition where a slab slot is reused for a
// new socket while the I/O driver is about to apply a readiness event. The
// generation value is checked when setting new readiness. If the generation do
// not match, then the readiness event is discarded.
pub(super) const GENERATION: bit::Pack = ADDRESS.then(7);
const TOKEN_WAKEUP: mio::Token = mio::Token(0);
const TOKEN_SIGNAL: mio::Token = mio::Token(1);

fn _assert_kinds() {
fn _assert<T: Send + Sync>() {}
Expand All @@ -117,20 +101,19 @@ impl Driver {
let waker = mio::Waker::new(poll.registry(), TOKEN_WAKEUP)?;
let registry = poll.registry().try_clone()?;

let slab = Slab::new();
let allocator = slab.allocator();

let driver = Driver {
tick: 0,
signal_ready: false,
events: mio::Events::with_capacity(nevents),
poll,
resources: slab,
};

let (registrations, synced) = RegistrationSet::new();

let handle = Handle {
registry,
io_dispatch: RwLock::new(IoDispatcher::new(allocator)),
registrations,
synced: Mutex::new(synced),
#[cfg(not(tokio_wasi))]
waker,
metrics: IoDriverMetrics::default(),
Expand All @@ -151,25 +134,20 @@ impl Driver {

pub(crate) fn shutdown(&mut self, rt_handle: &driver::Handle) {
let handle = rt_handle.io();
let ios = handle.registrations.shutdown(&mut handle.synced.lock());

if handle.shutdown() {
self.resources.for_each(|io| {
// If a task is waiting on the I/O resource, notify it that the
// runtime is being shutdown. And shutdown will clear all wakers.
io.shutdown();
});
// `shutdown()` must be called without holding the lock.
for io in ios {
io.shutdown();
}
}

fn turn(&mut self, handle: &Handle, max_wait: Option<Duration>) {
// How often to call `compact()` on the resource slab
const COMPACT_INTERVAL: u8 = 255;
debug_assert!(!handle.registrations.is_shutdown(&handle.synced.lock()));

self.tick = self.tick.wrapping_add(1);

if self.tick == COMPACT_INTERVAL {
self.resources.compact()
}
handle.release_pending_registrations();

let events = &mut self.events;

Expand All @@ -196,35 +174,24 @@ impl Driver {
} else if token == TOKEN_SIGNAL {
self.signal_ready = true;
} else {
Self::dispatch(
&mut self.resources,
self.tick,
token,
Ready::from_mio(event),
);
ready_count += 1;
}
}

handle.metrics.incr_ready_count_by(ready_count);
}

fn dispatch(resources: &mut Slab<ScheduledIo>, tick: u8, token: mio::Token, ready: Ready) {
let addr = slab::Address::from_usize(ADDRESS.unpack(token.0));
let ready = Ready::from_mio(event);
// Use std::ptr::from_exposed_addr when stable
let ptr: *const ScheduledIo = token.0 as *const _;

let io = match resources.get(addr) {
Some(io) => io,
None => return,
};
// Safety: we ensure that the pointers used as tokens are not freed
// until they are both deregistered from mio **and** we know the I/O
// driver is not concurrently polling. The I/O driver holds ownership of
// an `Arc<ScheduledIo>` so we can safely cast this to a ref.
let io: &ScheduledIo = unsafe { &*ptr };

let res = io.set_readiness(Some(token.0), Tick::Set(tick), |curr| curr | ready);
io.set_readiness(Tick::Set(self.tick), |curr| curr | ready);
io.wake(ready);

if res.is_err() {
// token no longer valid!
return;
ready_count += 1;
}
}

io.wake(ready);
handle.metrics.incr_ready_count_by(ready_count);
}
}

Expand Down Expand Up @@ -256,52 +223,44 @@ impl Handle {
&self,
source: &mut impl mio::event::Source,
interest: Interest,
) -> io::Result<slab::Ref<ScheduledIo>> {
let (address, shared) = self.allocate()?;
) -> io::Result<Arc<ScheduledIo>> {
let scheduled_io = self.registrations.allocate(&mut self.synced.lock())?;
let token = scheduled_io.token();

let token = GENERATION.pack(shared.generation(), ADDRESS.pack(address.as_usize(), 0));

self.registry
.register(source, mio::Token(token), interest.to_mio())?;
// TODO: if this returns an err, the `ScheduledIo` leaks...
self.registry.register(source, token, interest.to_mio())?;

// TODO: move this logic to `RegistrationSet` and use a `CountedLinkedList`
self.metrics.incr_fd_count();

Ok(shared)
Ok(scheduled_io)
}

/// Deregisters an I/O resource from the reactor.
pub(super) fn deregister_source(&self, source: &mut impl mio::event::Source) -> io::Result<()> {
pub(super) fn deregister_source(
&self,
registration: &Arc<ScheduledIo>,
source: &mut impl Source,
) -> io::Result<()> {
// Deregister the source with the OS poller **first**
self.registry.deregister(source)?;

if self
.registrations
.deregister(&mut self.synced.lock(), registration)
{
self.unpark();
}

self.metrics.dec_fd_count();

Ok(())
}

/// shutdown the dispatcher.
fn shutdown(&self) -> bool {
let mut io = self.io_dispatch.write().unwrap();
if io.is_shutdown {
return false;
fn release_pending_registrations(&self) {
if self.registrations.needs_release() {
self.registrations.release(&mut self.synced.lock());
}
io.is_shutdown = true;
true
}

fn allocate(&self) -> io::Result<(slab::Address, slab::Ref<ScheduledIo>)> {
let io = self.io_dispatch.read().unwrap();
if io.is_shutdown {
return Err(io::Error::new(
io::ErrorKind::Other,
crate::util::error::RUNTIME_SHUTTING_DOWN_ERROR,
));
}
io.allocator.allocate().ok_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
"reactor at max registered I/O resources",
)
})
}
}

Expand All @@ -311,17 +270,6 @@ impl fmt::Debug for Handle {
}
}

// ===== impl IoDispatcher =====

impl IoDispatcher {
fn new(allocator: slab::Allocator<ScheduledIo>) -> Self {
Self {
allocator,
is_shutdown: false,
}
}
}

impl Direction {
pub(super) fn mask(self) -> Ready {
match self {
Expand Down
3 changes: 3 additions & 0 deletions tokio/src/runtime/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ pub(crate) use driver::{Driver, Handle, ReadyEvent};
mod registration;
pub(crate) use registration::Registration;

mod registration_set;
use registration_set::RegistrationSet;

mod scheduled_io;
use scheduled_io::ScheduledIo;

Expand Down
8 changes: 5 additions & 3 deletions tokio/src/runtime/io/registration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
use crate::io::interest::Interest;
use crate::runtime::io::{Direction, Handle, ReadyEvent, ScheduledIo};
use crate::runtime::scheduler;
use crate::util::slab;

use mio::event::Source;
use std::io;
use std::sync::Arc;
use std::task::{Context, Poll};

cfg_io_driver! {
Expand Down Expand Up @@ -45,10 +45,12 @@ cfg_io_driver! {
#[derive(Debug)]
pub(crate) struct Registration {
/// Handle to the associated runtime.
///
/// TODO: this can probably be moved into `ScheduledIo`.
handle: scheduler::Handle,

/// Reference to state stored by the driver.
shared: slab::Ref<ScheduledIo>,
shared: Arc<ScheduledIo>,
}
}

Expand Down Expand Up @@ -95,7 +97,7 @@ impl Registration {
///
/// `Err` is returned if an error is encountered.
pub(crate) fn deregister(&mut self, io: &mut impl Source) -> io::Result<()> {
self.handle().deregister_source(io)
self.handle().deregister_source(&self.shared, io)
}

pub(crate) fn clear_readiness(&self, event: ReadyEvent) {
Expand Down
Loading

0 comments on commit b573adc

Please sign in to comment.