diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index f40a233257d12..6bd83c725a528 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1570,7 +1570,7 @@ void OSDService::send_map(MOSDMap *m, Connection *con) } void OSDService::send_incremental_map(epoch_t since, Connection *con, - OSDMapRef& osdmap) + const OSDMapRef& osdmap) { epoch_t to = osdmap->get_epoch(); dout(10) << "send_incremental_map " << since << " -> " << to @@ -2007,7 +2007,7 @@ void OSDService::clear_sent_ready_to_merge() sent_ready_to_merge_source.clear(); } -void OSDService::prune_sent_ready_to_merge(OSDMapRef& osdmap) +void OSDService::prune_sent_ready_to_merge(const OSDMapRef& osdmap) { std::lock_guard l(merge_lock); auto i = sent_ready_to_merge_source.begin(); @@ -3029,6 +3029,7 @@ float OSD::get_osd_snap_trim_sleep() int OSD::init() { + OSDMapRef osdmap; CompatSet initial, diff; std::lock_guard lock(osd_lock); if (is_stopping()) @@ -3145,13 +3146,14 @@ int OSD::init() } // load up "current" osdmap - assert_warn(!osdmap); - if (osdmap) { + assert_warn(!get_osdmap()); + if (get_osdmap()) { derr << "OSD::init: unable to read current osdmap" << dendl; r = -EINVAL; goto out; } osdmap = get_map(superblock.current_epoch); + set_osdmap(osdmap); // make sure we don't have legacy pgs deleting { @@ -4090,9 +4092,9 @@ int OSD::shutdown() } // note unmount epoch - dout(10) << "noting clean unmount in epoch " << osdmap->get_epoch() << dendl; + dout(10) << "noting clean unmount in epoch " << get_osdmap_epoch() << dendl; superblock.mounted = service.get_boot_epoch(); - superblock.clean_thru = osdmap->get_epoch(); + superblock.clean_thru = get_osdmap_epoch(); ObjectStore::Transaction t; write_superblock(t); int r = store->queue_transaction(service.meta_ch, std::move(t)); @@ -4156,7 +4158,7 @@ int OSD::shutdown() osd_lock.Unlock(); map_lock.get_write(); - osdmap = OSDMapRef(); + set_osdmap(OSDMapRef()); map_lock.put_write(); for (auto s : shards) { @@ -4641,7 +4643,7 @@ void OSD::load_pgs() if (map_epoch > 0) { OSDMapRef pgosdmap = service.try_get_map(map_epoch); if (!pgosdmap) { - if (!osdmap->have_pg_pool(pgid.pool())) { + if (!get_osdmap()->have_pg_pool(pgid.pool())) { derr << __func__ << ": could not find map for epoch " << map_epoch << " on pg " << pgid << ", but the pool is not present in the " << "current map, so this is probably a result of bug 10617. " @@ -4657,7 +4659,7 @@ void OSD::load_pgs() } pg = _make_pg(pgosdmap, pgid); } else { - pg = _make_pg(osdmap, pgid); + pg = _make_pg(get_osdmap(), pgid); } if (!pg) { recursive_remove_collection(cct, store, pgid, *it); @@ -4857,7 +4859,7 @@ void OSD::resume_creating_pg() while (spare_pgs > 0 && pg != pending_creates_from_osd.cend()) { dout(20) << __func__ << " pg " << pg->first << dendl; vector acting; - osdmap->pg_to_up_acting_osds(pg->first, nullptr, nullptr, &acting, nullptr); + get_osdmap()->pg_to_up_acting_osds(pg->first, nullptr, nullptr, &acting, nullptr); service.queue_want_pg_temp(pg->first, twiddle(acting), true); pg = pending_creates_from_osd.erase(pg); do_sub_pg_creates = true; @@ -4875,7 +4877,7 @@ void OSD::resume_creating_pg() do_renew_subs = true; } } - version_t start = osdmap->get_epoch() + 1; + version_t start = get_osdmap_epoch() + 1; if (have_pending_creates) { // don't miss any new osdmap deleting PGs if (monc->sub_want("osdmap", start, 0)) { @@ -4924,7 +4926,7 @@ void OSD::build_initial_pg_history( pgid.pgid, &up, &up_primary, &acting, &acting_primary); ostringstream debug; - for (epoch_t e = created + 1; e <= osdmap->get_epoch(); ++e) { + for (epoch_t e = created + 1; e <= get_osdmap_epoch(); ++e) { OSDMapRef osdmap = service.get_map(e); int new_up_primary, new_acting_primary; vector new_up, new_acting; @@ -4990,7 +4992,7 @@ void OSD::_add_heartbeat_peer(int p) map::iterator i = heartbeat_peers.find(p); if (i == heartbeat_peers.end()) { - pair cons = service.get_con_osd_hb(p, osdmap->get_epoch()); + pair cons = service.get_con_osd_hb(p, get_osdmap_epoch()); if (!cons.first) return; hi = &heartbeat_peers[p]; @@ -5015,7 +5017,7 @@ void OSD::_add_heartbeat_peer(int p) } else { hi = &i->second; } - hi->epoch = osdmap->get_epoch(); + hi->epoch = get_osdmap_epoch(); } void OSD::_remove_heartbeat_peer(int n) @@ -5078,7 +5080,7 @@ void OSD::maybe_update_heartbeat_peers() _get_pgs(&pgs); for (auto& pg : pgs) { pg->with_heartbeat_peers([&](int peer) { - if (osdmap->is_up(peer)) { + if (get_osdmap()->is_up(peer)) { _add_heartbeat_peer(peer); } }); @@ -5087,10 +5089,10 @@ void OSD::maybe_update_heartbeat_peers() // include next and previous up osds to ensure we have a fully-connected set set want, extras; - const int next = osdmap->get_next_up_osd_after(whoami); + const int next = get_osdmap()->get_next_up_osd_after(whoami); if (next >= 0) want.insert(next); - int prev = osdmap->get_previous_up_osd_before(whoami); + int prev = get_osdmap()->get_previous_up_osd_before(whoami); if (prev >= 0 && prev != next) want.insert(prev); @@ -5098,7 +5100,7 @@ void OSD::maybe_update_heartbeat_peers() // subtree level (e.g., hosts) for fast failure detection. auto min_down = cct->_conf.get_val("mon_osd_min_down_reporters"); auto subtree = cct->_conf.get_val("mon_osd_reporter_subtree_level"); - osdmap->get_random_up_osds_by_subtree( + get_osdmap()->get_random_up_osds_by_subtree( whoami, subtree, min_down, want, &want); for (set::iterator p = want.begin(); p != want.end(); ++p) { @@ -5110,13 +5112,13 @@ void OSD::maybe_update_heartbeat_peers() // remove down peers; enumerate extras map::iterator p = heartbeat_peers.begin(); while (p != heartbeat_peers.end()) { - if (!osdmap->is_up(p->first)) { + if (!get_osdmap()->is_up(p->first)) { int o = p->first; ++p; _remove_heartbeat_peer(o); continue; } - if (p->second.epoch < osdmap->get_epoch()) { + if (p->second.epoch < get_osdmap_epoch()) { extras.insert(p->first); } ++p; @@ -5131,7 +5133,7 @@ void OSD::maybe_update_heartbeat_peers() extras.insert(n); _add_heartbeat_peer(n); } - n = osdmap->get_next_up_osd_after(n); + n = get_osdmap()->get_next_up_osd_after(n); if (n == next) break; // came full circle; stop } @@ -5627,7 +5629,7 @@ void OSD::heartbeat() if (now - last_mon_heartbeat > cct->_conf->osd_mon_heartbeat_interval && is_active()) { last_mon_heartbeat = now; dout(10) << "i have no heartbeat peers; checking mon for new map" << dendl; - osdmap_subscribe(osdmap->get_epoch() + 1, false); + osdmap_subscribe(get_osdmap_epoch() + 1, false); } } @@ -5702,7 +5704,7 @@ void OSD::tick() if (now - last_mon_heartbeat > cct->_conf->osd_mon_heartbeat_interval) { last_mon_heartbeat = now; dout(1) << __func__ << " checking mon for new map" << dendl; - osdmap_subscribe(osdmap->get_epoch() + 1, false); + osdmap_subscribe(get_osdmap_epoch() + 1, false); } } @@ -6210,6 +6212,7 @@ void OSD::_preboot(epoch_t oldest, epoch_t newest) heartbeat(); } + const auto osdmap = get_osdmap(); // if our map within recent history, try to add ourselves to the osdmap. if (osdmap->get_epoch() == 0) { derr << "waiting for initial osdmap" << dendl; @@ -6246,7 +6249,7 @@ void OSD::_preboot(epoch_t oldest, epoch_t newest) << dendl; osd_lock.Unlock(); for (auto shard : shards) { - shard->wait_min_pg_epoch(osdmap->get_epoch()); + shard->wait_min_pg_epoch(get_osdmap_epoch()); } osd_lock.Lock(); } @@ -6279,7 +6282,7 @@ void OSD::send_full_update() set s; OSDMap::calc_state_set(state, s); dout(10) << __func__ << " want state " << s << dendl; - monc->send_mon_message(new MOSDFull(osdmap->get_epoch(), state)); + monc->send_mon_message(new MOSDFull(get_osdmap_epoch(), state)); } void OSD::start_waiting_for_healthy() @@ -6289,7 +6292,7 @@ void OSD::start_waiting_for_healthy() last_heartbeat_resample = utime_t(); // subscribe to osdmap updates, in case our peers really are known to be dead - osdmap_subscribe(osdmap->get_epoch() + 1, false); + osdmap_subscribe(get_osdmap_epoch() + 1, false); } bool OSD::_is_healthy() @@ -6484,7 +6487,7 @@ void OSD::_collect_metadata(map *pm) void OSD::queue_want_up_thru(epoch_t want) { map_lock.get_read(); - epoch_t cur = osdmap->get_up_thru(whoami); + epoch_t cur = get_osdmap()->get_up_thru(whoami); std::lock_guard l(mon_report_lock); if (want > up_thru_wanted) { dout(10) << "queue_want_up_thru now " << want << " (was " << up_thru_wanted << ")" @@ -6503,6 +6506,7 @@ void OSD::queue_want_up_thru(epoch_t want) void OSD::send_alive() { ceph_assert(mon_report_lock.is_locked()); + const auto osdmap = get_osdmap(); if (!osdmap->exists(whoami)) return; epoch_t up_thru = osdmap->get_up_thru(whoami); @@ -6586,6 +6590,7 @@ void OSD::send_failures() ceph_assert(mon_report_lock.is_locked()); std::lock_guard l(heartbeat_lock); utime_t now = ceph_clock_now(); + const auto osdmap = get_osdmap(); while (!failure_queue.empty()) { int osd = failure_queue.begin()->first; if (!failure_pending.count(osd)) { @@ -6618,7 +6623,7 @@ void OSD::cancel_pending_failures() while (it != failure_pending.end()) { dout(10) << __func__ << " canceling in-flight failure report for osd." << it->first << dendl; - send_still_alive(osdmap->get_epoch(), it->first, it->second.second); + send_still_alive(get_osdmap_epoch(), it->first, it->second.second); failure_pending.erase(it++); } } @@ -6635,7 +6640,7 @@ void OSD::send_beacon(const ceph::coarse_mono_clock::time_point& now) MOSDBeacon* beacon = nullptr; { std::lock_guard l{min_last_epoch_clean_lock}; - beacon = new MOSDBeacon(osdmap->get_epoch(), min_last_epoch_clean); + beacon = new MOSDBeacon(get_osdmap_epoch(), min_last_epoch_clean); beacon->pgs = min_last_epoch_clean_pgs; last_sent_beacon = now; } @@ -6995,7 +7000,7 @@ int OSD::_do_command( } else { spg_t pcand; PGRef pg; - if (osdmap->get_primary_shard(pgid, &pcand) && + if (get_osdmap()->get_primary_shard(pgid, &pcand) && (pg = _lookup_lock_pg(pcand))) { if (pg->is_primary()) { // simulate pg cmd= for pg->do-command @@ -7018,7 +7023,7 @@ int OSD::_do_command( // send them the latest diff to ensure they realize the mapping // has changed. - service.send_incremental_map(osdmap->get_epoch() - 1, con, osdmap); + service.send_incremental_map(get_osdmap_epoch() - 1, con, get_osdmap()); // do not reply; they will get newer maps and realize they // need to resend. @@ -7715,7 +7720,7 @@ void OSD::_dispatch(Message *m) if (m->trace) op->osd_trace.init("osd op", &trace_endpoint, &m->trace); // no map? starting up? - if (!osdmap) { + if (!get_osdmap()) { dout(7) << "no OSDMap, not booted" << dendl; logger->inc(l_osd_waiting_for_map); waiting_for_osdmap.push_back(op); @@ -7751,7 +7756,7 @@ void OSD::handle_scrub(MOSDScrub *m) vector v; for (auto pgid : m->scrub_pgs) { spg_t pcand; - if (osdmap->get_primary_shard(pgid, &pcand) && + if (get_osdmap()->get_primary_shard(pgid, &pcand) && std::find(spgs.begin(), spgs.end(), pcand) != spgs.end()) { v.push_back(pcand); } @@ -8038,11 +8043,11 @@ MPGStats* OSD::collect_pg_stats() osd_stat_t cur_stat = service.get_osd_stat(); cur_stat.os_perf_stat = store->get_cur_stats(); - auto m = new MPGStats(monc->get_fsid(), osdmap->get_epoch(), had_for); + auto m = new MPGStats(monc->get_fsid(), get_osdmap_epoch(), had_for); m->osd_stat = cur_stat; std::lock_guard lec{min_last_epoch_clean_lock}; - min_last_epoch_clean = osdmap->get_epoch(); + min_last_epoch_clean = get_osdmap_epoch(); min_last_epoch_clean_pgs.clear(); std::set pool_set; @@ -8135,7 +8140,7 @@ void OSD::wait_for_new_map(OpRequestRef op) { // ask? if (waiting_for_osdmap.empty()) { - osdmap_subscribe(osdmap->get_epoch() + 1, false); + osdmap_subscribe(get_osdmap_epoch() + 1, false); } logger->inc(l_osd_waiting_for_map); @@ -8151,7 +8156,7 @@ void OSD::wait_for_new_map(OpRequestRef op) void OSD::note_down_osd(int peer) { ceph_assert(osd_lock.is_locked()); - cluster_messenger->mark_down_addrs(osdmap->get_cluster_addrs(peer)); + cluster_messenger->mark_down_addrs(get_osdmap()->get_cluster_addrs(peer)); heartbeat_lock.Lock(); failure_queue.erase(peer); @@ -8169,7 +8174,7 @@ void OSD::note_down_osd(int peer) void OSD::note_up_osd(int peer) { - service.forget_peer_epoch(peer, osdmap->get_epoch() - 1); + service.forget_peer_epoch(peer, get_osdmap_epoch() - 1); heartbeat_set_peers_need_update(); } @@ -8257,10 +8262,11 @@ void OSD::handle_osd_map(MOSDMap *m) osd_min = min; } } + epoch_t osdmap_epoch = get_osdmap_epoch(); if (osd_min > 0 && - osdmap->get_epoch() > max_lag && - osdmap->get_epoch() - max_lag > osd_min) { - epoch_t need = osdmap->get_epoch() - max_lag; + osdmap_epoch > max_lag && + osdmap_epoch - max_lag > osd_min) { + epoch_t need = osdmap_epoch - max_lag; dout(10) << __func__ << " waiting for pgs to catch up (need " << need << " max_lag " << max_lag << ")" << dendl; for (auto shard : shards) { @@ -8406,7 +8412,7 @@ void OSD::handle_osd_map(MOSDMap *m) inc.decode(p); if (o->apply_incremental(inc) < 0) { - derr << "ERROR: bad fsid? i have " << osdmap->get_fsid() << " and inc has " << inc.fsid << dendl; + derr << "ERROR: bad fsid? i have " << get_osdmap()->get_fsid() << " and inc has " << inc.fsid << dendl; ceph_abort_msg("bad fsid"); } @@ -8554,6 +8560,7 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m) bool do_shutdown = false; bool do_restart = false; bool network_error = false; + OSDMapRef osdmap; // advance through the new maps for (epoch_t cur = first; cur <= last; cur++) { @@ -8571,6 +8578,7 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m) // kill connections to newly down osds bool waited_for_reservations = false; set old; + osdmap = get_osdmap(); osdmap->get_all_osds(old); for (set::iterator p = old.begin(); p != old.end(); ++p) { if (*p != whoami && @@ -8602,7 +8610,8 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m) } } - osdmap = newmap; + osdmap = std::move(newmap); + set_osdmap(osdmap); epoch_t up_epoch; epoch_t boot_epoch; service.retrieve_epochs(&boot_epoch, &up_epoch, NULL); @@ -8810,6 +8819,7 @@ void OSD::check_osdmap_features() // current memory location, and setting or clearing bits in integer // fields, and we are the only writer, this is not a problem. + const auto osdmap = get_osdmap(); { Messenger::Policy p = client_messenger->get_default_policy(); uint64_t mask; @@ -9115,6 +9125,7 @@ bool OSD::advance_pg( void OSD::consume_map() { ceph_assert(osd_lock.is_locked()); + auto osdmap = get_osdmap(); dout(7) << "consume_map version " << osdmap->get_epoch() << dendl; /** make sure the cluster is speaking in SORTBITWISE, because we don't @@ -9232,6 +9243,7 @@ void OSD::consume_map() void OSD::activate_map() { ceph_assert(osd_lock.is_locked()); + auto osdmap = get_osdmap(); dout(7) << "activate_map version " << osdmap->get_epoch() << dendl; @@ -9309,7 +9321,7 @@ bool OSD::require_self_aliveness(const Message *m, epoch_t epoch) return true; } -bool OSD::require_same_peer_instance(const Message *m, OSDMapRef& map, +bool OSD::require_same_peer_instance(const Message *m, const OSDMapRef& map, bool is_fast_dispatch) { int from = m->get_source().num(); @@ -9348,6 +9360,7 @@ bool OSD::require_same_or_newer_map(OpRequestRef& op, epoch_t epoch, bool is_fast_dispatch) { const Message *m = op->get_req(); + const auto osdmap = get_osdmap(); dout(15) << "require_same_or_newer_map " << epoch << " (i am " << osdmap->get_epoch() << ") " << m << dendl; @@ -9454,6 +9467,7 @@ void OSD::handle_pg_create(OpRequestRef op) op->mark_started(); + const auto osdmap = get_osdmap(); map::const_iterator ci = m->ctimes.begin(); for (map::const_iterator p = m->mkpg.begin(); p != m->mkpg.end(); @@ -10485,9 +10499,9 @@ int OSD::init_op_flags(OpRequestRef& op) // check for ec base pool int64_t poolid = m->get_pg().pool(); - const pg_pool_t *pool = osdmap->get_pg_pool(poolid); + const pg_pool_t *pool = get_osdmap()->get_pg_pool(poolid); if (pool && pool->is_tier()) { - const pg_pool_t *base_pool = osdmap->get_pg_pool(pool->tier_of); + const pg_pool_t *base_pool = get_osdmap()->get_pg_pool(pool->tier_of); if (base_pool && base_pool->require_rollback()) { if ((iter->op.op != CEPH_OSD_OP_READ) && (iter->op.op != CEPH_OSD_OP_CHECKSUM) && @@ -10769,7 +10783,7 @@ epoch_t OSDShard::get_max_waiting_epoch() } void OSDShard::consume_map( - OSDMapRef& new_osdmap, + const OSDMapRef& new_osdmap, unsigned *pushes_to_free) { std::lock_guard l(shard_lock); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index c87deeb0eecb0..80ca9af538cd0 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -403,7 +403,7 @@ class OSDService { void forget_peer_epoch(int p, epoch_t e); void send_map(class MOSDMap *m, Connection *con); - void send_incremental_map(epoch_t since, Connection *con, OSDMapRef& osdmap); + void send_incremental_map(epoch_t since, Connection *con, const OSDMapRef& osdmap); MOSDMap *build_incremental_map_msg(epoch_t from, epoch_t to, OSDSuperblock& superblock); bool should_share_map(entity_name_t name, Connection *con, epoch_t epoch, @@ -730,7 +730,7 @@ class OSDService { void send_ready_to_merge(); void _send_ready_to_merge(); void clear_sent_ready_to_merge(); - void prune_sent_ready_to_merge(OSDMapRef& osdmap); + void prune_sent_ready_to_merge(const OSDMapRef& osdmap); // -- pg_temp -- private: @@ -1214,7 +1214,7 @@ struct OSDShard { /// push osdmap into shard void consume_map( - OSDMapRef& osdmap, + const OSDMapRef& osdmap, unsigned *pushes_to_free); void _wake_pg_slot(spg_t pgid, OSDShardPGSlot *slot); @@ -1514,8 +1514,7 @@ class OSD : public Dispatcher, i != sessions_to_check.end(); sessions_to_check.erase(i++)) { std::lock_guard l{(*i)->session_dispatch_lock}; - SessionRef session = *i; - dispatch_session_waiting(session, osdmap); + dispatch_session_waiting(*i, get_osdmap()); } } void session_handle_reset(SessionRef session) { @@ -1883,11 +1882,17 @@ class OSD : public Dispatcher, protected: // -- osd map -- - OSDMapRef osdmap; - OSDMapRef get_osdmap() { - return osdmap; + // TODO: switch to std::atomic when C++20 will be available. + OSDMapRef _osdmap; + void set_osdmap(OSDMapRef osdmap) { + std::atomic_store(&_osdmap, osdmap); + } + OSDMapRef get_osdmap() const { + return std::atomic_load(&_osdmap); } epoch_t get_osdmap_epoch() const { + // XXX: performance? + auto osdmap = get_osdmap(); return osdmap ? osdmap->get_epoch() : 0; } @@ -2096,7 +2101,7 @@ class OSD : public Dispatcher, * address as in the given map. * @pre op was sent by an OSD using the cluster messenger */ - bool require_same_peer_instance(const Message *m, OSDMapRef& map, + bool require_same_peer_instance(const Message *m, const OSDMapRef& map, bool is_fast_dispatch); bool require_same_or_newer_map(OpRequestRef& op, epoch_t e,