From b64667dba568e1852b4b52105fe140a24807d258 Mon Sep 17 00:00:00 2001 From: Jie Yao Date: Tue, 9 Jun 2026 08:39:31 +0800 Subject: [PATCH 1/2] redo destroy pg --- conanfile.py | 2 +- src/lib/homestore_backend/gc_manager.cpp | 11 +++-- src/lib/homestore_backend/hs_homeobject.hpp | 6 ++- src/lib/homestore_backend/hs_pg_manager.cpp | 44 +++++++++++++------ .../replication_state_machine.cpp | 16 ++++++- 5 files changed, 54 insertions(+), 25 deletions(-) diff --git a/conanfile.py b/conanfile.py index 17ec62ba..0e6905e3 100644 --- a/conanfile.py +++ b/conanfile.py @@ -10,7 +10,7 @@ class HomeObjectConan(ConanFile): name = "homeobject" - version = "4.1.18" + version = "4.1.19" homepage = "https://github.com/eBay/HomeObject" description = "Blob Store built on HomeStore" diff --git a/src/lib/homestore_backend/gc_manager.cpp b/src/lib/homestore_backend/gc_manager.cpp index df7baf34..c93eb43d 100644 --- a/src/lib/homestore_backend/gc_manager.cpp +++ b/src/lib/homestore_backend/gc_manager.cpp @@ -365,7 +365,7 @@ folly::SemiFuture< bool > GCManager::pdev_gc_actor::add_gc_task(uint8_t priority const auto pg_id = EXvchunk->m_pg_id.value(); m_hs_home_object->gc_manager()->incr_pg_pending_gc_task(pg_id); - if (!m_hs_home_object->can_chunks_in_pg_be_gc(pg_id)) { + if (!m_hs_home_object->is_pg_alive(pg_id)) { LOGDEBUGMOD(gcmgr, "chunk_id={} belongs to pg {}, which is not eligible for gc at this moment!", move_from_chunk, pg_id) m_hs_home_object->gc_manager()->decr_pg_pending_gc_task(pg_id); @@ -504,7 +504,7 @@ void GCManager::pdev_gc_actor::handle_recovered_gc_task( } // we have no gc_task_guard for recovered gc task, so we need to do this manually to make sure the gc task can be - // marked as completed and the pg can be marked as available for new gc task + // marked as completed on_gc_task_completed(priority, pg_id, move_from_chunk, move_to_chunk, vchunk_id, true, 0); GCLOGD(RECOVERD_GC_TASK_ID, pg_id, NO_SHARD_ID, @@ -797,9 +797,8 @@ bool GCManager::pdev_gc_actor::copy_valid_data( move_from_chunk); } - // check if all the pbas in the valid_blob_indexes are in move_from_chunk, if not, it means the - // shard is being modified during gc, we can not guarantee the data consistency, so we fail this gc - // task and let it be retried later. + // check if all the pbas in the valid_blob_indexes are in move_from_chunk, if not, we cancel this task and retry + // later. for (const auto& [blob, v] : valid_blob_indexes) { auto pba = v.pbas(); if (pba.chunk_num() != move_from_chunk) { @@ -1100,7 +1099,7 @@ bool GCManager::pdev_gc_actor::purge_reserved_chunk(chunk_id_t chunk, const uint RELEASE_ASSERT(!vchunk->m_pg_id.has_value(), "chunk_id={} is expected to be a reserved chunk, and not belong to a pg", chunk); RELEASE_ASSERT(vchunk->m_state == ChunkState::GC, - "chunk_id={} is a reserved chunk, expected to have a GC state, but actuall state is {} ", chunk, + "chunk_id={} is a reserved chunk, expected to have a GC state, but the actual state is {} ", chunk, vchunk->m_state); // Clear all rreqs on the reserved chunk BEFORE reset() resets its allocator. diff --git a/src/lib/homestore_backend/hs_homeobject.hpp b/src/lib/homestore_backend/hs_homeobject.hpp index 75c182b2..a74e1301 100644 --- a/src/lib/homestore_backend/hs_homeobject.hpp +++ b/src/lib/homestore_backend/hs_homeobject.hpp @@ -879,7 +879,9 @@ class HSHomeObject : public HomeObjectImpl { * * @param pg_id The ID of the PG to be destroyed. */ - bool pg_destroy(pg_id_t pg_id, bool need_to_pause_pg_state_machine = false); + bool pg_destroy(pg_id_t pg_id); + + void destroy_pg_resource(pg_id_t pg_id); bool pause_pg_state_machine(pg_id_t pg_id); @@ -977,7 +979,7 @@ class HSHomeObject : public HomeObjectImpl { * @param pg_id The ID of the PG whose shards are to be destroyed. * @return True if the chunks in the PG can be garbage collected, false otherwise. */ - bool can_chunks_in_pg_be_gc(pg_id_t pg_id) const; + bool is_pg_alive(pg_id_t pg_id) const; bool pg_exists(pg_id_t pg_id) const; diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index 09c23ef9..eea449df 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -688,18 +688,7 @@ std::optional< pg_id_t > HSHomeObject::get_pg_id_with_group_id(group_id_t group_ void HSHomeObject::_destroy_pg(pg_id_t pg_id) { pg_destroy(pg_id); } -bool HSHomeObject::pg_destroy(pg_id_t pg_id, bool need_to_pause_pg_state_machine) { - if (need_to_pause_pg_state_machine && !pause_pg_state_machine(pg_id)) { - LOGI("Failed to pause pg state machine, pg_id={}", pg_id); - return false; - } - LOGI("Destroying pg={}", pg_id); - mark_pg_destroyed(pg_id); - - // we have the assumption that after pg is marked as destroyed, it will not be marked as alive again. - // TODO:: if this assumption is broken, we need to handle it. - gc_mgr_->drain_pg_pending_gc_task(pg_id); - +void HSHomeObject::destroy_pg_resource(pg_id_t pg_id) { destroy_shards(pg_id); destroy_hs_resources(pg_id); destroy_pg_index_table(pg_id); @@ -709,8 +698,35 @@ bool HSHomeObject::pg_destroy(pg_id_t pg_id, bool need_to_pause_pg_state_machine // which must be done after destroying pg super blk to avoid multiple pg use same chunks bool res = chunk_selector_->return_pg_chunks_to_dev_heap(pg_id); RELEASE_ASSERT(res, "Failed to return pg={} chunks to dev_heap", pg_id); + LOGI("resource of pg={} is destroyed", pg_id); +} - LOGI("pg={} is destroyed", pg_id); +bool HSHomeObject::pg_destroy(pg_id_t pg_id) { + auto hs_pg = const_cast< HS_PG* >(get_hs_pg(pg_id)); + RELEASE_ASSERT(hs_pg, "pg={} is null", pg_id); + auto repl_dev = hs_pg ? hs_pg->repl_dev_ : nullptr; + RELEASE_ASSERT(repl_dev, "repl_dev for pg={} is null", pg_id); + + // when reaching here, we will not receive any new log, both for BR or leave_group case. we wait for all logs to be + // committed and trigger a cp, so that when restart, no log will be replayed since cp_lsn is equal to + // last_append_lsn. + while (true) { + if (repl_dev->get_last_commit_lsn() == repl_dev->get_last_append_lsn()) { break; } + LOGI("Waiting for pg={} to be idle before destroying, last_append_lsn={}, last_commit_lsn={}", pg_id, + repl_dev->get_last_append_lsn(), repl_dev->get_last_commit_lsn()); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + + RELEASE_ASSERT(homestore::hs()->cp_mgr().trigger_cp_flush(true).get(), + "Failed to trigger checkpoint flush before destroying pg={}", pg_id); + + LOGI("Destroying pg={}", pg_id); + mark_pg_destroyed(pg_id); + + // we have the assumption that after pg is marked as destroyed, it will not be marked as alive again. + // TODO:: if this assumption is broken, we need to handle it. + gc_mgr_->drain_pg_pending_gc_task(pg_id); + destroy_pg_resource(pg_id); return true; } @@ -800,7 +816,7 @@ void HSHomeObject::mark_pg_destroyed(pg_id_t pg_id) { LOGD("pg={} is marked as destroyed", pg_id); } -bool HSHomeObject::can_chunks_in_pg_be_gc(pg_id_t pg_id) const { +bool HSHomeObject::is_pg_alive(pg_id_t pg_id) const { auto lg = std::scoped_lock(_pg_lock); auto hs_pg = const_cast< HS_PG* >(_get_hs_pg_unlocked(pg_id)); if (hs_pg == nullptr) { diff --git a/src/lib/homestore_backend/replication_state_machine.cpp b/src/lib/homestore_backend/replication_state_machine.cpp index 81ec3e6d..c97278cb 100644 --- a/src/lib/homestore_backend/replication_state_machine.cpp +++ b/src/lib/homestore_backend/replication_state_machine.cpp @@ -449,6 +449,10 @@ void ReplicationStateMachine::write_snapshot_obj(std::shared_ptr< homestore::sna set_snapshot_context(context); // Update the snapshot context in case apply_snapshot is not called auto hs_pg = home_object_->get_hs_pg(m_snp_rcv_handler->get_context_pg_id()); hs_pg->pg_state_.clear_state(PGStateMask::BASELINE_RESYNC); + // we only reset this if destroying pg happens in BR case. for other cases (on_destroy and _exit_pg), + // since this replica will leave the PG and no later logs will be received, no need to reset this. + reset_no_space_left_error_info(); + repl_dev()->reset_latch_lsn(); return; } @@ -499,7 +503,7 @@ void ReplicationStateMachine::write_snapshot_obj(std::shared_ptr< homestore::sna if (home_object_->pg_exists(pg_data->pg_id())) { LOGI("pg already exists, clean pg resources before snapshot, pg={} {}", pg_data->pg_id(), log_suffix); // Need to pause state machine before destroying the PG, if fail, let raft retry. - if (!home_object_->pg_destroy(pg_data->pg_id(), true /* pause state machine */)) { + if (!home_object_->pg_destroy(pg_data->pg_id())) { LOGE("failed to destroy existing pg, let raft retry, pg={} {}", pg_data->pg_id(), log_suffix); return; } @@ -1030,7 +1034,15 @@ void ReplicationStateMachine::on_log_replay_done(const homestore::group_id_t& gr const auto pg_id = pg_id_opt.value(); RELEASE_ASSERT(home_object_->pg_exists(pg_id), "pg={} should exist, but not! fatal error!", pg_id); - const auto& shards_in_pg = (const_cast< HSHomeObject::HS_PG* >(home_object_->_get_hs_pg_unlocked(pg_id)))->shards_; + const auto hs_pg = (const_cast< HSHomeObject::HS_PG* >(home_object_->get_hs_pg(pg_id))); + RELEASE_ASSERT(hs_pg, "Failed to get pg={} when log replay done", pg_id); + if (hs_pg->pg_sb_->state == PGState::DESTROYED) { + // pg resources were not cleaned up on the previous restart, clean them up now. + home_object_->destroy_pg_resource(pg_id); + return; + } + + const auto& shards_in_pg = hs_pg->shards_; auto chunk_selector = home_object_->chunk_selector(); for (const auto& shard_iter : shards_in_pg) { From 6c5e3149af82e2e7f4ee431baef13925cf4f2af1 Mon Sep 17 00:00:00 2001 From: Jie Yao Date: Tue, 9 Jun 2026 13:48:48 +0800 Subject: [PATCH 2/2] update --- src/lib/homestore_backend/hs_pg_manager.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib/homestore_backend/hs_pg_manager.cpp b/src/lib/homestore_backend/hs_pg_manager.cpp index eea449df..96a48556 100644 --- a/src/lib/homestore_backend/hs_pg_manager.cpp +++ b/src/lib/homestore_backend/hs_pg_manager.cpp @@ -965,8 +965,7 @@ void HSHomeObject::on_pg_meta_blk_found(sisl::byte_view const& buf, void* meta_c hs_pg->index_table_ = it->second.index_table; it->second.pg_id = pg_id; } else { - RELEASE_ASSERT(hs_pg->pg_sb_->state == PGState::DESTROYED, "IndexTable should be recovered before PG"); - hs_pg->index_table_ = nullptr; + RELEASE_ASSERT(hs_pg->pg_sb_->state == PGState::DESTROYED, "IndexTable should be recovered for alive PG"); LOGI("Index table not found for destroyed pg={}, index_table_uuid={}", pg_id, uuid_str); }