Skip to content

Commit

Permalink
block: strict rq_affinity
Browse files Browse the repository at this point in the history
Some systems benefit from completions always being steered to the strict
requester cpu rather than the looser "per-socket" steering that
blk_cpu_to_group() attempts by default. This is because the first
CPU in the group mask ends up being completely overloaded with work,
while the others (including the original submitter) has power left
to spare.

Allow the strict mode to be set by writing '2' to the sysfs control
file. This is identical to the scheme used for the nomerges file,
where '2' is a more aggressive setting than just being turned on.

echo 2 > /sys/block/<bdev>/queue/rq_affinity

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Roland Dreier <roland@purestorage.com>
Tested-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
  • Loading branch information
djbw authored and Jens Axboe committed Jul 23, 2011
1 parent ef32308 commit 5757a6d
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 16 deletions.
10 changes: 7 additions & 3 deletions Documentation/block/queue-sysfs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,13 @@ device.

rq_affinity (RW)
----------------
If this option is enabled, the block layer will migrate request completions
to the CPU that originally submitted the request. For some workloads
this provides a significant reduction in CPU cycles due to caching effects.
If this option is '1', the block layer will migrate request completions to the
cpu "group" that originally submitted the request. For some workloads this
provides a significant reduction in CPU cycles due to caching effects.

For storage configurations that need to maximize distribution of completion
processing setting this option to '2' forces the completion to run on the
requesting cpu (bypassing the "group" aggregation logic).

scheduler (RW)
--------------
Expand Down
6 changes: 2 additions & 4 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1279,10 +1279,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
init_request_from_bio(req, bio);

if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
bio_flagged(bio, BIO_CPU_AFFINE)) {
req->cpu = blk_cpu_to_group(get_cpu());
put_cpu();
}
bio_flagged(bio, BIO_CPU_AFFINE))
req->cpu = smp_processor_id();

plug = current->plug;
if (plug) {
Expand Down
11 changes: 7 additions & 4 deletions block/blk-softirq.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,22 +103,25 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = {

void __blk_complete_request(struct request *req)
{
int ccpu, cpu, group_cpu = NR_CPUS;
struct request_queue *q = req->q;
unsigned long flags;
int ccpu, cpu, group_cpu;

BUG_ON(!q->softirq_done_fn);

local_irq_save(flags);
cpu = smp_processor_id();
group_cpu = blk_cpu_to_group(cpu);

/*
* Select completion CPU
*/
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) {
ccpu = req->cpu;
else
if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) {
ccpu = blk_cpu_to_group(ccpu);
group_cpu = blk_cpu_to_group(cpu);
}
} else
ccpu = cpu;

if (ccpu == cpu || ccpu == group_cpu) {
Expand Down
13 changes: 9 additions & 4 deletions block/blk-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,8 +244,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
{
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);

return queue_var_show(set, page);
return queue_var_show(set << force, page);
}

static ssize_t
Expand All @@ -257,10 +258,14 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)

ret = queue_var_store(&val, page, count);
spin_lock_irq(q->queue_lock);
if (val)
if (val) {
queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
else
queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
if (val == 2)
queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
} else {
queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
}
spin_unlock_irq(q->queue_lock);
#endif
return ret;
Expand Down
3 changes: 2 additions & 1 deletion include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ struct request_queue {
#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */
#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */
#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 9 /* force complete on same CPU */
#define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */
#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */
#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */
#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */
Expand All @@ -402,6 +402,7 @@ struct request_queue {
#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */
#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */
#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */

#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
Expand Down

0 comments on commit 5757a6d

Please sign in to comment.