Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Async I/O (syscalls 245 - 249) #1584

Open
wants to merge 34 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
0b2b56a
Individually stub each AIO syscall
kmeisthax Sep 27, 2021
175c52c
Add a structure in the task for AIO contexts to live in.
kmeisthax Sep 27, 2021
9c4a3f8
Implement `io_setup`
kmeisthax Sep 28, 2021
241fe79
Implement `io_destroy`.
kmeisthax Sep 30, 2021
5f041a3
Store the owning PID of each AIO context, so that resolved requests c…
kmeisthax Oct 1, 2021
9912680
Elaborate on the event structure to be able to hold pending requests,…
kmeisthax Oct 1, 2021
cce2d45
Add method for submitting a pending event.
kmeisthax Oct 1, 2021
314f335
Add functions for retrieving pending events from a context.
kmeisthax Oct 3, 2021
5390cf5
Add AIO syscalls to systrace
kmeisthax Oct 3, 2021
0cf305b
Add aioctx table method for retrieving (and retaining) a context by i…
kmeisthax Oct 3, 2021
032d31d
Half-implement io_submit.
kmeisthax Oct 3, 2021
ecfeae1
Add `aioctx_cancel_event` & other documentation fixes
kmeisthax Oct 3, 2021
da5a606
Add io_submit op so that FDs can handle asynchronous operations.
kmeisthax Oct 3, 2021
a4560f5
Add method to complete pending events.
kmeisthax Oct 4, 2021
9510f30
Add fallback for filesystems that don't yet implement true async I/O.
kmeisthax Oct 4, 2021
1dd76fe
Submitted events must retain the guest address of the IOCB structure …
kmeisthax Oct 4, 2021
8638202
Implement completion polling (io_getevents)
kmeisthax Oct 4, 2021
1c9f39a
Everything except io_setup takes a context ID directly, not a pointer…
kmeisthax Oct 4, 2021
d678aad
Properly initialize the lock and refcount in AIO contexts.
kmeisthax Oct 10, 2021
940d53d
Fix a bunch of bounds checks
kmeisthax Oct 10, 2021
21bd634
Always NULL out contexts in the task context table when removing them…
kmeisthax Oct 10, 2021
b504474
Add E2E test for AIO read and write.
kmeisthax Oct 10, 2021
867e899
Don't specify an enum impl type because gcc doesn't support that C gi…
kmeisthax Oct 11, 2021
09b2e07
In fallback code, treat FDSYNC as a full FSYNC.
kmeisthax Oct 24, 2021
fc90511
Pull the pread/pwrite emulation code out into separate functions.
kmeisthax Oct 25, 2021
8ffda31
Add E2E test for vectored async IO
kmeisthax Oct 25, 2021
f270bc1
Implement vectored async IO fallback
kmeisthax Oct 25, 2021
9769c6e
Fix incorrect/spurious failure on seeking to non-zero offsets
kmeisthax Oct 25, 2021
0a8c063
Remove implicit coercions in tests so that they don't sign-extend thi…
kmeisthax Oct 31, 2021
e8d3c9f
Add a condition variable for waiting on events, and wait for it to be…
kmeisthax Nov 21, 2021
6ea8dce
Formatting & consistency nits
kmeisthax Nov 21, 2021
1174ec7
Rip out and replace the `AIO_IOCB_` family of constants with a host s…
kmeisthax Nov 21, 2021
399265e
Also rip out the `AIO_IO_EVENT` constants for another host-compiled s…
kmeisthax Nov 21, 2021
6c09ad5
Remove e2e tests dependency on `linux/aio_abi.h` header
kmeisthax Nov 21, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
307 changes: 307 additions & 0 deletions fs/aio.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,307 @@
#include "fs/aio.h"
#include "kernel/errno.h"
#include <limits.h>
#include <string.h>

// Ensure a minimum capacity in the AIOCTX table.
//
// AIOCTX must be locked before resizing the table. The lock can be elided in
// contexts where you know the table is not shared yet.
//
// Attempts to shrink the table will be rejected silently.
// May return _ENOMEM if memory for the new table could not be allocated.
static int _aioctx_table_ensure(struct aioctx_table *tbl, unsigned int newcap) {
if (tbl == NULL) return 0;
if (tbl->capacity >= newcap) return 0;
if ((INT_MAX / sizeof(struct aioctx*)) < newcap) return _ENOMEM;

struct aioctx **new_contexts = malloc(sizeof(struct aioctx*) * newcap);
if (new_contexts == NULL) return _ENOMEM;

memset(new_contexts, 0, sizeof(struct aioctx*) * newcap);
if (tbl->contexts) {
memcpy(new_contexts, tbl->contexts, sizeof(struct aioctx*) * tbl->capacity);
free(tbl->contexts);
}

tbl->contexts = new_contexts;
tbl->capacity = newcap;

return 0;
}

struct aioctx *aioctx_new(int events_capacity, pid_t pid) {
if ((INT_MAX / sizeof(struct aioctx_event)) < events_capacity) return NULL;

struct aioctx *aioctx = malloc(sizeof(struct aioctx));
if (aioctx == NULL) return NULL;

struct aioctx_event *aioctx_events = malloc(sizeof(struct aioctx_event) * events_capacity);
if (aioctx_events == NULL) {
free(aioctx);
return NULL;
}

memset(aioctx_events, 0, sizeof(struct aioctx_event) * events_capacity);

lock_init(&aioctx->lock);
cond_init(&aioctx->cond);

aioctx->refcount = 1;
aioctx->events_capacity = events_capacity;
aioctx->events = aioctx_events;
aioctx->is_owned_by_task = true;
aioctx->pid = pid;

return aioctx;
}

void aioctx_retain(struct aioctx *ctx) {
if (ctx == NULL) return;

lock(&ctx->lock);
ctx->refcount++;
unlock(&ctx->lock);
}

static void _aioctx_decrement_ref(struct aioctx *ctx) {
if (--ctx->refcount == 0) {
cond_destroy(&ctx->cond);
free(ctx->events);
free(ctx);
} else {
unlock(&ctx->lock);
}
}

void aioctx_release(struct aioctx *ctx) {
if (ctx == NULL) return;

lock(&ctx->lock);
_aioctx_decrement_ref(ctx);
}

void aioctx_release_from_task(struct aioctx *ctx) {
if (ctx == NULL) return;

lock(&ctx->lock);
ctx->is_owned_by_task = false;
_aioctx_decrement_ref(ctx);
}

signed int aioctx_submit_pending_event(struct aioctx *ctx, uint64_t user_data, addr_t iocbp, struct aioctx_event_pending pending_data) {
if (ctx == NULL) return _EINVAL;

lock(&ctx->lock);

signed int index = _EAGAIN;

for (int i = 0; i < ctx->events_capacity; i += 1) {
if (ctx->events[i].tag == AIOCTX_NONE) {
index = i;

ctx->events[i].tag = AIOCTX_PENDING;
ctx->events[i].user_data = user_data;
ctx->events[i].iocb_obj = iocbp;
ctx->events[i].data.as_pending = pending_data;

break;
}
}

unlock(&ctx->lock);

return index;
}

void aioctx_cancel_event(struct aioctx *ctx, unsigned int index) {
if (ctx == NULL) return;

lock(&ctx->lock);

if (index >= ctx->events_capacity) return;

if (ctx->events[index].tag == AIOCTX_PENDING)
ctx->events[index].tag = AIOCTX_NONE;

unlock(&ctx->lock);
}

void aioctx_complete_event(struct aioctx *ctx, unsigned int index, int64_t result0, int64_t result1) {
if (ctx == NULL) return;

lock(&ctx->lock);

if (index >= ctx->events_capacity) return;

if (ctx->events[index].tag == AIOCTX_PENDING) {
ctx->events[index].tag = AIOCTX_COMPLETE;

struct aioctx_event_complete data;

data.result[0] = result0;
data.result[1] = result1;

ctx->events[index].data.as_complete = data;
}

notify_once(&ctx->cond);
unlock(&ctx->lock);
}

bool aioctx_consume_completed_event(struct aioctx *ctx, uint64_t *user_data, addr_t *iocbp, struct aioctx_event_complete *completed_data) {
if (ctx == NULL) return false;

bool result = false;

lock(&ctx->lock);

for (int i = 0; i < ctx->events_capacity; i += 1) {
if (ctx->events[i].tag == AIOCTX_COMPLETE) {
*user_data = ctx->events[i].user_data;
*iocbp = ctx->events[i].iocb_obj;
*completed_data = ctx->events[i].data.as_complete;

ctx->events[i].tag = AIOCTX_NONE;
result = true;

break;
}
}

unlock(&ctx->lock);

return result;
}

int aioctx_wait_for_completion(struct aioctx *ctx, struct timespec *timeout) {
if (ctx == NULL) return _EINVAL;

lock(&ctx->lock);
int err = wait_for(&ctx->cond, &ctx->lock, timeout);
unlock(&ctx->lock);

return err;
}

void aioctx_lock(struct aioctx *ctx) {
if (ctx == NULL) return;

lock(&ctx->lock);
}

void aioctx_unlock(struct aioctx *ctx) {
if (ctx == NULL) return;

unlock(&ctx->lock);
}

signed int aioctx_get_pending_event(struct aioctx *ctx, unsigned int index, struct aioctx_event_pending **event) {
if (ctx == NULL) return _EINVAL;
if (!ctx->is_owned_by_task) return _EINVAL;
if (index >= ctx->events_capacity) return _EINVAL;
if (ctx->events[index].tag != AIOCTX_PENDING) return _EINVAL;

if (event != NULL) *event = &ctx->events[index].data.as_pending;

return 0;
}

signed int aioctx_table_new(struct aioctx_table *tbl, unsigned int capacity) {
if (tbl == NULL) return _EINVAL;

tbl->capacity = 0;
tbl->contexts = NULL;
lock_init(&tbl->lock);

int err = _aioctx_table_ensure(tbl, capacity);
if (err < 0) return err;

return 0;
}

void aioctx_table_delete(struct aioctx_table *tbl) {
if (tbl == NULL) return;

lock(&tbl->lock);
for (int i = 0; i < tbl->capacity; i += 1) {
if (tbl->contexts[i] != NULL) {
aioctx_release_from_task(tbl->contexts[i]);
}
}
free(tbl->contexts);
}

signed int aioctx_table_insert(struct aioctx_table *tbl, struct aioctx *ctx) {
if (tbl == NULL) return _EINVAL;
if (ctx == NULL) return _EINVAL;

lock(&tbl->lock);

for (int i = 0; i < tbl->capacity; i += 1) {
if (tbl->contexts[i] == NULL) {
tbl->contexts[i] = ctx;
aioctx_retain(ctx);
unlock(&tbl->lock);
return i;
}
}

//At this point, we've scanned the entire table and every entry is full.
int old_capacity = tbl->capacity;
if (((INT_MAX - 1) / 2) <= old_capacity) return _ENOMEM;

int err = _aioctx_table_ensure(tbl, (tbl->capacity * 2) + 1);
if (err < 0) return err;

tbl->contexts[old_capacity] = ctx;

aioctx_retain(ctx);
unlock(&tbl->lock);

return old_capacity;
}

signed int aioctx_table_remove(struct aioctx_table *tbl, unsigned int ctx_id) {
if (tbl == NULL) return _EINVAL;

lock(&tbl->lock);

if (ctx_id >= tbl->capacity) {
unlock(&tbl->lock);
return _EINVAL;
}

struct aioctx *ctx = tbl->contexts[ctx_id];
if (ctx == NULL) {
unlock(&tbl->lock);
return _EINVAL;
}

aioctx_release_from_task(ctx);
tbl->contexts[ctx_id] = NULL;

unlock(&tbl->lock);

return 0;
}

struct aioctx *aioctx_table_get_and_retain(struct aioctx_table *tbl, unsigned int ctx_id) {
if (tbl == NULL) return NULL;

lock(&tbl->lock);

if (ctx_id >= tbl->capacity) {
unlock(&tbl->lock);
return NULL;
}

struct aioctx *ctx = tbl->contexts[ctx_id];
if (ctx != NULL) {
aioctx_retain(ctx);
}

unlock(&tbl->lock);

return ctx;
}