Skip to content

Commit

Permalink
Merge tag 'for-5.19/io_uring-passthrough-2022-05-22' of git://git.ker…
Browse files Browse the repository at this point in the history
…nel.dk/linux-block

Pull io_uring NVMe command passthrough from Jens Axboe:
 "On top of everything else, this adds support for passthrough for
  io_uring.

  The initial feature for this is NVMe passthrough support, which allows
  non-filesystem based IO commands and admin commands.

  To support this, io_uring grows support for SQE and CQE members that
  are twice as big, allowing to pass in a full NVMe command without
  having to copy data around. And to complete with more than just a
  single 32-bit value as the output"

* tag 'for-5.19/io_uring-passthrough-2022-05-22' of git://git.kernel.dk/linux-block: (22 commits)
  io_uring: cleanup handling of the two task_work lists
  nvme: enable uring-passthrough for admin commands
  nvme: helper for uring-passthrough checks
  blk-mq: fix passthrough plugging
  nvme: add vectored-io support for uring-cmd
  nvme: wire-up uring-cmd support for io-passthru on char-device.
  nvme: refactor nvme_submit_user_cmd()
  block: wire-up support for passthrough plugging
  fs,io_uring: add infrastructure for uring-cmd
  io_uring: support CQE32 for nop operation
  io_uring: enable CQE32
  io_uring: support CQE32 in /proc info
  io_uring: add tracing for additional CQE32 fields
  io_uring: overflow processing for CQE32
  io_uring: flush completions for CQE32
  io_uring: modify io_get_cqe for CQE32
  io_uring: add CQE32 completion processing
  io_uring: add CQE32 setup processing
  io_uring: change ring size calculation for CQE32
  io_uring: store add. return values for CQE32
  ...
  • Loading branch information
torvalds committed May 23, 2022
2 parents e1a8fde + 3fe07bc commit 9836e93
Show file tree
Hide file tree
Showing 11 changed files with 806 additions and 138 deletions.
109 changes: 63 additions & 46 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,62 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
complete(waiting);
}

/*
* Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
* queues. This is important for md arrays to benefit from merging
* requests.
*/
static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
{
if (plug->multiple_queues)
return BLK_MAX_REQUEST_COUNT * 2;
return BLK_MAX_REQUEST_COUNT;
}

static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
{
struct request *last = rq_list_peek(&plug->mq_list);

if (!plug->rq_count) {
trace_block_plug(rq->q);
} else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
(!blk_queue_nomerges(rq->q) &&
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
blk_mq_flush_plug_list(plug, false);
trace_block_plug(rq->q);
}

if (!plug->multiple_queues && last && last->q != rq->q)
plug->multiple_queues = true;
if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
plug->has_elevator = true;
rq->rq_next = NULL;
rq_list_add(&plug->mq_list, rq);
plug->rq_count++;
}

static void __blk_execute_rq_nowait(struct request *rq, bool at_head,
rq_end_io_fn *done, bool use_plug)
{
WARN_ON(irqs_disabled());
WARN_ON(!blk_rq_is_passthrough(rq));

rq->end_io = done;

blk_account_io_start(rq);

if (use_plug && current->plug) {
blk_add_rq_to_plug(current->plug, rq);
return;
}
/*
* don't check dying flag for MQ because the request won't
* be reused after dying flag is set
*/
blk_mq_sched_insert_request(rq, at_head, true, false);
}


/**
* blk_execute_rq_nowait - insert a request to I/O scheduler for execution
* @rq: request to insert
Expand All @@ -1184,18 +1240,8 @@ static void blk_end_sync_rq(struct request *rq, blk_status_t error)
*/
void blk_execute_rq_nowait(struct request *rq, bool at_head, rq_end_io_fn *done)
{
WARN_ON(irqs_disabled());
WARN_ON(!blk_rq_is_passthrough(rq));
__blk_execute_rq_nowait(rq, at_head, done, true);

rq->end_io = done;

blk_account_io_start(rq);

/*
* don't check dying flag for MQ because the request won't
* be reused after dying flag is set
*/
blk_mq_sched_insert_request(rq, at_head, true, false);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);

Expand Down Expand Up @@ -1233,8 +1279,13 @@ blk_status_t blk_execute_rq(struct request *rq, bool at_head)
DECLARE_COMPLETION_ONSTACK(wait);
unsigned long hang_check;

/*
* iopoll requires request to be submitted to driver, so can't
* use plug
*/
rq->end_io_data = &wait;
blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq);
__blk_execute_rq_nowait(rq, at_head, blk_end_sync_rq,
!blk_rq_is_poll(rq));

/* Prevent hang_check timer from firing at us during very long I/O */
hang_check = sysctl_hung_task_timeout_secs;
Expand Down Expand Up @@ -2676,40 +2727,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
hctx->queue->mq_ops->commit_rqs(hctx);
}

/*
* Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
* queues. This is important for md arrays to benefit from merging
* requests.
*/
static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
{
if (plug->multiple_queues)
return BLK_MAX_REQUEST_COUNT * 2;
return BLK_MAX_REQUEST_COUNT;
}

static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
{
struct request *last = rq_list_peek(&plug->mq_list);

if (!plug->rq_count) {
trace_block_plug(rq->q);
} else if (plug->rq_count >= blk_plug_max_rq_count(plug) ||
(!blk_queue_nomerges(rq->q) &&
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
blk_mq_flush_plug_list(plug, false);
trace_block_plug(rq->q);
}

if (!plug->multiple_queues && last && last->q != rq->q)
plug->multiple_queues = true;
if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
plug->has_elevator = true;
rq->rq_next = NULL;
rq_list_add(&plug->mq_list, rq);
plug->rq_count++;
}

static bool blk_mq_attempt_bio_merge(struct request_queue *q,
struct bio *bio, unsigned int nr_segs)
{
Expand Down
2 changes: 2 additions & 0 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -3146,6 +3146,7 @@ static const struct file_operations nvme_dev_fops = {
.release = nvme_dev_release,
.unlocked_ioctl = nvme_dev_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.uring_cmd = nvme_dev_uring_cmd,
};

static ssize_t nvme_sysfs_reset(struct device *dev,
Expand Down Expand Up @@ -3699,6 +3700,7 @@ static const struct file_operations nvme_ns_chr_fops = {
.release = nvme_ns_chr_release,
.unlocked_ioctl = nvme_ns_chr_ioctl,
.compat_ioctl = compat_ptr_ioctl,
.uring_cmd = nvme_ns_chr_uring_cmd,
};

static int nvme_add_ns_cdev(struct nvme_ns *ns)
Expand Down
Loading

0 comments on commit 9836e93

Please sign in to comment.