Skip to content

Commit

Permalink
Merge tag 'md/4.10-rc6' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/shli/md

Pull MD fixes from Shaohua Li:
 "This fixes several corner cases for raid5 cache, which is merged into
  this cycle"

* tag 'md/4.10-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md/r5cache: disable write back for degraded array
  md/r5cache: shift complex rmw from read path to write path
  md/r5cache: flush data only stripes in r5l_recovery_log()
  md/raid5: move comment of fetch_block to right location
  md/r5cache: read data into orig_page for prexor of cached data
  md/raid5-cache: delete meaningless code
  • Loading branch information
torvalds committed Jan 28, 2017
2 parents 64a172d + 2e38a37 commit dd55396
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 45 deletions.
5 changes: 5 additions & 0 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -5291,6 +5291,11 @@ int md_run(struct mddev *mddev)
if (start_readonly && mddev->ro == 0)
mddev->ro = 2; /* read-only, but switch on first write */

/*
* NOTE: some pers->run(), for example r5l_recovery_log(), wakes
* up mddev->thread. It is important to initialize critical
* resources for mddev->thread BEFORE calling pers->run().
*/
err = pers->run(mddev);
if (err)
pr_warn("md: pers->run() failed ...\n");
Expand Down
106 changes: 88 additions & 18 deletions drivers/md/raid5-cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ struct r5l_log {

/* to submit async io_units, to fulfill ordering of flush */
struct work_struct deferred_io_work;
/* to disable write back during in degraded mode */
struct work_struct disable_writeback_work;
};

/*
Expand Down Expand Up @@ -611,6 +613,21 @@ static void r5l_submit_io_async(struct work_struct *work)
r5l_do_submit_io(log, io);
}

static void r5c_disable_writeback_async(struct work_struct *work)
{
struct r5l_log *log = container_of(work, struct r5l_log,
disable_writeback_work);
struct mddev *mddev = log->rdev->mddev;

if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
return;
pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
mdname(mddev));
mddev_suspend(mddev);
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
mddev_resume(mddev);
}

static void r5l_submit_current_io(struct r5l_log *log)
{
struct r5l_io_unit *io = log->current_io;
Expand Down Expand Up @@ -1393,8 +1410,6 @@ static void r5l_do_reclaim(struct r5l_log *log)
next_checkpoint = r5c_calculate_new_cp(conf);
spin_unlock_irq(&log->io_list_lock);

BUG_ON(reclaimable < 0);

if (reclaimable == 0 || !write_super)
return;

Expand Down Expand Up @@ -2062,7 +2077,7 @@ static int
r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
struct r5l_recovery_ctx *ctx)
{
struct stripe_head *sh, *next;
struct stripe_head *sh;
struct mddev *mddev = log->rdev->mddev;
struct page *page;
sector_t next_checkpoint = MaxSector;
Expand All @@ -2076,7 +2091,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,

WARN_ON(list_empty(&ctx->cached_list));

list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
list_for_each_entry(sh, &ctx->cached_list, lru) {
struct r5l_meta_block *mb;
int i;
int offset;
Expand Down Expand Up @@ -2126,14 +2141,39 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
ctx->pos = write_pos;
ctx->seq += 1;
next_checkpoint = sh->log_start;
list_del_init(&sh->lru);
raid5_release_stripe(sh);
}
log->next_checkpoint = next_checkpoint;
__free_page(page);
return 0;
}

static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
struct r5l_recovery_ctx *ctx)
{
struct mddev *mddev = log->rdev->mddev;
struct r5conf *conf = mddev->private;
struct stripe_head *sh, *next;

if (ctx->data_only_stripes == 0)
return;

log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_BACK;

list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
r5c_make_stripe_write_out(sh);
set_bit(STRIPE_HANDLE, &sh->state);
list_del_init(&sh->lru);
raid5_release_stripe(sh);
}

md_wakeup_thread(conf->mddev->thread);
/* reuse conf->wait_for_quiescent in recovery */
wait_event(conf->wait_for_quiescent,
atomic_read(&conf->active_stripes) == 0);

log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
}

static int r5l_recovery_log(struct r5l_log *log)
{
struct mddev *mddev = log->rdev->mddev;
Expand All @@ -2160,32 +2200,31 @@ static int r5l_recovery_log(struct r5l_log *log)
pos = ctx.pos;
ctx.seq += 10000;

if (ctx.data_only_stripes == 0) {
log->next_checkpoint = ctx.pos;
r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
}

if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
pr_debug("md/raid:%s: starting from clean shutdown\n",
mdname(mddev));
else {
else
pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
mdname(mddev), ctx.data_only_stripes,
ctx.data_parity_stripes);

if (ctx.data_only_stripes > 0)
if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
mdname(mddev));
return -EIO;
}
if (ctx.data_only_stripes == 0) {
log->next_checkpoint = ctx.pos;
r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
} else if (r5c_recovery_rewrite_data_only_stripes(log, &ctx)) {
pr_err("md/raid:%s: failed to rewrite stripes to journal\n",
mdname(mddev));
return -EIO;
}

log->log_start = ctx.pos;
log->seq = ctx.seq;
log->last_checkpoint = pos;
r5l_write_super(log, pos);

r5c_recovery_flush_data_only_stripes(log, &ctx);
return 0;
}

Expand Down Expand Up @@ -2247,6 +2286,10 @@ static ssize_t r5c_journal_mode_store(struct mddev *mddev,
val > R5C_JOURNAL_MODE_WRITE_BACK)
return -EINVAL;

if (raid5_calc_degraded(conf) > 0 &&
val == R5C_JOURNAL_MODE_WRITE_BACK)
return -EINVAL;

mddev_suspend(mddev);
conf->log->r5c_journal_mode = val;
mddev_resume(mddev);
Expand Down Expand Up @@ -2301,6 +2344,16 @@ int r5c_try_caching_write(struct r5conf *conf,
set_bit(STRIPE_R5C_CACHING, &sh->state);
}

/*
* When run in degraded mode, array is set to write-through mode.
* This check helps drain pending write safely in the transition to
* write-through mode.
*/
if (s->failed) {
r5c_make_stripe_write_out(sh);
return -EAGAIN;
}

for (i = disks; i--; ) {
dev = &sh->dev[i];
/* if non-overwrite, use writing-out phase */
Expand Down Expand Up @@ -2351,6 +2404,8 @@ void r5c_release_extra_page(struct stripe_head *sh)
struct page *p = sh->dev[i].orig_page;

sh->dev[i].orig_page = sh->dev[i].page;
clear_bit(R5_OrigPageUPTDODATE, &sh->dev[i].flags);

if (!using_disk_info_extra_page)
put_page(p);
}
Expand Down Expand Up @@ -2555,6 +2610,19 @@ static int r5l_load_log(struct r5l_log *log)
return ret;
}

void r5c_update_on_rdev_error(struct mddev *mddev)
{
struct r5conf *conf = mddev->private;
struct r5l_log *log = conf->log;

if (!log)
return;

if (raid5_calc_degraded(conf) > 0 &&
conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
schedule_work(&log->disable_writeback_work);
}

int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
{
struct request_queue *q = bdev_get_queue(rdev->bdev);
Expand Down Expand Up @@ -2627,6 +2695,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
spin_lock_init(&log->no_space_stripes_lock);

INIT_WORK(&log->deferred_io_work, r5l_submit_io_async);
INIT_WORK(&log->disable_writeback_work, r5c_disable_writeback_async);

log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
INIT_LIST_HEAD(&log->stripe_in_journal_list);
Expand Down Expand Up @@ -2659,6 +2728,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)

void r5l_exit_log(struct r5l_log *log)
{
flush_work(&log->disable_writeback_work);
md_unregister_thread(&log->reclaim_thread);
mempool_destroy(log->meta_pool);
bioset_free(log->bs);
Expand Down
Loading

0 comments on commit dd55396

Please sign in to comment.