From 0ff90570d06f7471a15a0132ca9edce1833c329d Mon Sep 17 00:00:00 2001 From: "ye.zou" Date: Thu, 19 Feb 2026 23:58:02 +0800 Subject: [PATCH] [ceph]: filter stale watchers from disconnected hosts before VM start Resolves: ZSTAC-73476 Change-Id: Ifb12d3d457f4f1ff803f2540d20d3d2460bee2bc --- .../primary/CephPrimaryStorageFactory.java | 48 ++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/plugin/ceph/src/main/java/org/zstack/storage/ceph/primary/CephPrimaryStorageFactory.java b/plugin/ceph/src/main/java/org/zstack/storage/ceph/primary/CephPrimaryStorageFactory.java index 682fc9dadf..801f50ff43 100755 --- a/plugin/ceph/src/main/java/org/zstack/storage/ceph/primary/CephPrimaryStorageFactory.java +++ b/plugin/ceph/src/main/java/org/zstack/storage/ceph/primary/CephPrimaryStorageFactory.java @@ -1228,12 +1228,38 @@ public void run(MessageReply reply) { } GetVolumeWatchersReply rly = (GetVolumeWatchersReply)reply; - List watchers = rly.getWatchers(); + List watchers = rly.getWatchers(); if (watchers == null || watchers.isEmpty()) { completion.success(); return; } + // Filter out stale watchers from disconnected hosts (ZSTAC-73476) + // When libvirt hangs on a host, ceph rbd watchers become stale but persist, + // blocking VM start on other hosts. Only watchers from connected hosts are valid. + Set disconnectedHostIps = new HashSet<>(Q.New(HostVO.class) + .select(HostVO_.managementIp) + .notEq(HostVO_.status, HostStatus.Connected) + .listValues()); + if (!disconnectedHostIps.isEmpty()) { + List activeWatchers = new ArrayList<>(); + for (String watcher : watchers) { + String watcherIp = extractWatcherIp(watcher); + if (watcherIp != null && disconnectedHostIps.contains(watcherIp)) { + logger.info(String.format("filtered stale watcher from disconnected host[ip:%s] for volume[uuid:%s]: %s", + watcherIp, msg.getVolumeUuid(), watcher)); + continue; + } + activeWatchers.add(watcher); + } + watchers = activeWatchers; + } + + if (watchers.isEmpty()) { + completion.success(); + return; + } + String installPath = Q.New(VolumeVO.class) .eq(VolumeVO_.uuid, msg.getVolumeUuid()) .select(VolumeVO_.installPath) @@ -1244,6 +1270,26 @@ public void run(MessageReply reply) { }); } + /** + * Extract IP address from rbd watcher string. + * Format: "watcher=IP:port/nonce client.ID cookie=COOKIE" + */ + private String extractWatcherIp(String watcher) { + if (watcher == null) { + return null; + } + int idx = watcher.indexOf("watcher="); + if (idx < 0) { + return null; + } + String rest = watcher.substring(idx + 8); + int colonIdx = rest.indexOf(':'); + if (colonIdx <= 0) { + return null; + } + return rest.substring(0, colonIdx); + } + @Override public void preReleaseVmResource(VmInstanceSpec spec, Completion completion) { completion.success();