fix: prevent hang on gateway restart, fix delete_agent binding loss, add health check grace period
- Add 30s timeout to gateway restart command to prevent UI from hanging indefinitely - Change delete_agent to reset bindings to "main" instead of removing them, preventing loss of channel binding entries - Add health status grace period: show "Checking..." badge while gateway is restarting, retry every 2s up to 5 times before showing "Unhealthy" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1013,11 +1013,16 @@ pub fn delete_agent(agent_id: String) -> Result<bool, String> {
|
|||||||
return Err(format!("Agent '{}' not found", agent_id));
|
return Err(format!("Agent '{}' not found", agent_id));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also remove any bindings that reference this agent
|
// Reset any bindings that reference this agent back to "main" (default)
|
||||||
|
// so the channel doesn't lose its binding entry entirely.
|
||||||
if let Some(bindings) = cfg.pointer_mut("/bindings").and_then(Value::as_array_mut) {
|
if let Some(bindings) = cfg.pointer_mut("/bindings").and_then(Value::as_array_mut) {
|
||||||
bindings.retain(|b| {
|
for b in bindings.iter_mut() {
|
||||||
b.get("agentId").and_then(Value::as_str) != Some(&agent_id)
|
if b.get("agentId").and_then(Value::as_str) == Some(&agent_id) {
|
||||||
});
|
if let Some(obj) = b.as_object_mut() {
|
||||||
|
obj.insert("agentId".into(), Value::String("main".into()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
write_config_with_snapshot(&paths, ¤t, &cfg, "delete-agent")?;
|
write_config_with_snapshot(&paths, ¤t, &cfg, "delete-agent")?;
|
||||||
@@ -1402,26 +1407,77 @@ fn run_external_command_raw(parts: &[&str]) -> Result<OpenclawCommandOutput, Str
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn run_openclaw_raw(args: &[&str]) -> Result<OpenclawCommandOutput, String> {
|
fn run_openclaw_raw(args: &[&str]) -> Result<OpenclawCommandOutput, String> {
|
||||||
let mut command = Command::new("openclaw");
|
run_openclaw_raw_timeout(args, None)
|
||||||
command.args(args);
|
}
|
||||||
let output = command
|
|
||||||
.output()
|
fn run_openclaw_raw_timeout(args: &[&str], timeout_secs: Option<u64>) -> Result<OpenclawCommandOutput, String> {
|
||||||
|
let mut child = Command::new("openclaw")
|
||||||
|
.args(args)
|
||||||
|
.stdout(std::process::Stdio::piped())
|
||||||
|
.stderr(std::process::Stdio::piped())
|
||||||
|
.spawn()
|
||||||
.map_err(|error| format!("failed to run openclaw: {error}"))?;
|
.map_err(|error| format!("failed to run openclaw: {error}"))?;
|
||||||
let exit_code = output.status.code().unwrap_or(-1);
|
|
||||||
let result = OpenclawCommandOutput {
|
if let Some(secs) = timeout_secs {
|
||||||
stdout: String::from_utf8_lossy(&output.stdout).trim_end().to_string(),
|
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(secs);
|
||||||
stderr: String::from_utf8_lossy(&output.stderr).trim_end().to_string(),
|
loop {
|
||||||
exit_code,
|
match child.try_wait().map_err(|e| e.to_string())? {
|
||||||
};
|
Some(status) => {
|
||||||
if exit_code != 0 {
|
let mut stdout_buf = Vec::new();
|
||||||
let details = if !result.stderr.is_empty() {
|
let mut stderr_buf = Vec::new();
|
||||||
result.stderr.clone()
|
if let Some(mut out) = child.stdout.take() {
|
||||||
} else {
|
std::io::Read::read_to_end(&mut out, &mut stdout_buf).ok();
|
||||||
result.stdout.clone()
|
}
|
||||||
|
if let Some(mut err) = child.stderr.take() {
|
||||||
|
std::io::Read::read_to_end(&mut err, &mut stderr_buf).ok();
|
||||||
|
}
|
||||||
|
let exit_code = status.code().unwrap_or(-1);
|
||||||
|
let result = OpenclawCommandOutput {
|
||||||
|
stdout: String::from_utf8_lossy(&stdout_buf).trim_end().to_string(),
|
||||||
|
stderr: String::from_utf8_lossy(&stderr_buf).trim_end().to_string(),
|
||||||
|
exit_code,
|
||||||
|
};
|
||||||
|
if exit_code != 0 {
|
||||||
|
let details = if !result.stderr.is_empty() {
|
||||||
|
result.stderr.clone()
|
||||||
|
} else {
|
||||||
|
result.stdout.clone()
|
||||||
|
};
|
||||||
|
return Err(format!("openclaw command failed ({exit_code}): {details}"));
|
||||||
|
}
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
if std::time::Instant::now() >= deadline {
|
||||||
|
let _ = child.kill();
|
||||||
|
return Err(format!(
|
||||||
|
"Command timed out after {secs}s. The gateway may still be restarting in the background."
|
||||||
|
));
|
||||||
|
}
|
||||||
|
std::thread::sleep(std::time::Duration::from_millis(250));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let output = child
|
||||||
|
.wait_with_output()
|
||||||
|
.map_err(|error| format!("failed to run openclaw: {error}"))?;
|
||||||
|
let exit_code = output.status.code().unwrap_or(-1);
|
||||||
|
let result = OpenclawCommandOutput {
|
||||||
|
stdout: String::from_utf8_lossy(&output.stdout).trim_end().to_string(),
|
||||||
|
stderr: String::from_utf8_lossy(&output.stderr).trim_end().to_string(),
|
||||||
|
exit_code,
|
||||||
};
|
};
|
||||||
return Err(format!("openclaw command failed ({exit_code}): {details}"));
|
if exit_code != 0 {
|
||||||
|
let details = if !result.stderr.is_empty() {
|
||||||
|
result.stderr.clone()
|
||||||
|
} else {
|
||||||
|
result.stdout.clone()
|
||||||
|
};
|
||||||
|
return Err(format!("openclaw command failed ({exit_code}): {details}"));
|
||||||
|
}
|
||||||
|
Ok(result)
|
||||||
}
|
}
|
||||||
Ok(result)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Strip leading non-JSON lines from CLI output (plugin logs, ANSI codes, etc.)
|
/// Strip leading non-JSON lines from CLI output (plugin logs, ANSI codes, etc.)
|
||||||
@@ -3209,8 +3265,8 @@ pub async fn apply_pending_changes() -> Result<bool, String> {
|
|||||||
fs::create_dir_all(bp.parent().unwrap()).map_err(|e| e.to_string())?;
|
fs::create_dir_all(bp.parent().unwrap()).map_err(|e| e.to_string())?;
|
||||||
fs::write(&bp, &text).map_err(|e| e.to_string())?;
|
fs::write(&bp, &text).map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
// Restart gateway
|
// Restart gateway (30s timeout to prevent indefinite hang)
|
||||||
run_openclaw_raw(&["gateway", "restart"])?;
|
run_openclaw_raw_timeout(&["gateway", "restart"], Some(30))?;
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}).await.map_err(|e| e.to_string())?
|
}).await.map_err(|e| e.to_string())?
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { useEffect, useMemo, useState } from "react";
|
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||||
import { api } from "../lib/api";
|
import { api } from "../lib/api";
|
||||||
import { Card, CardContent } from "@/components/ui/card";
|
import { Card, CardContent } from "@/components/ui/card";
|
||||||
import { Badge } from "@/components/ui/badge";
|
import { Badge } from "@/components/ui/badge";
|
||||||
@@ -76,11 +76,31 @@ export function Home({ onCook }: { onCook?: (recipeId: string, source?: string)
|
|||||||
const [creatingAgent, setCreatingAgent] = useState(false);
|
const [creatingAgent, setCreatingAgent] = useState(false);
|
||||||
const [createAgentError, setCreateAgentError] = useState("");
|
const [createAgentError, setCreateAgentError] = useState("");
|
||||||
|
|
||||||
// Fast calls: render immediately
|
// Health status with grace period: retry quickly when unhealthy, then slow-poll
|
||||||
useEffect(() => {
|
const [statusSettled, setStatusSettled] = useState(false);
|
||||||
api.getStatusLight().then(setStatus).catch(() => {});
|
const retriesRef = useRef(0);
|
||||||
|
|
||||||
|
const fetchStatus = useCallback(() => {
|
||||||
|
api.getStatusLight().then((s) => {
|
||||||
|
setStatus(s);
|
||||||
|
if (s.healthy) {
|
||||||
|
setStatusSettled(true);
|
||||||
|
retriesRef.current = 0;
|
||||||
|
} else if (retriesRef.current < 5) {
|
||||||
|
retriesRef.current++;
|
||||||
|
} else {
|
||||||
|
setStatusSettled(true);
|
||||||
|
}
|
||||||
|
}).catch(() => {});
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
fetchStatus();
|
||||||
|
// Poll fast (2s) while not settled, slow (10s) once settled
|
||||||
|
const interval = setInterval(fetchStatus, statusSettled ? 10000 : 2000);
|
||||||
|
return () => clearInterval(interval);
|
||||||
|
}, [fetchStatus, statusSettled]);
|
||||||
|
|
||||||
const refreshAgents = () => {
|
const refreshAgents = () => {
|
||||||
api.listAgentsOverview().then(setAgents).catch(() => {});
|
api.listAgentsOverview().then(setAgents).catch(() => {});
|
||||||
};
|
};
|
||||||
@@ -166,13 +186,13 @@ export function Home({ onCook }: { onCook?: (recipeId: string, source?: string)
|
|||||||
<CardContent className="grid grid-cols-[auto_1fr] gap-x-6 gap-y-3 items-center">
|
<CardContent className="grid grid-cols-[auto_1fr] gap-x-6 gap-y-3 items-center">
|
||||||
<span className="text-sm text-muted-foreground">Health</span>
|
<span className="text-sm text-muted-foreground">Health</span>
|
||||||
<span className="text-sm font-medium">
|
<span className="text-sm font-medium">
|
||||||
{status ? (
|
{!status ? "..." : status.healthy ? (
|
||||||
status.healthy ? (
|
<Badge className="bg-green-100 text-green-700 border-0">Healthy</Badge>
|
||||||
<Badge className="bg-green-100 text-green-700 border-0">Healthy</Badge>
|
) : !statusSettled ? (
|
||||||
) : (
|
<Badge className="bg-amber-100 text-amber-700 border-0">Checking...</Badge>
|
||||||
<Badge className="bg-red-100 text-red-700 border-0">Unhealthy</Badge>
|
) : (
|
||||||
)
|
<Badge className="bg-red-100 text-red-700 border-0">Unhealthy</Badge>
|
||||||
) : "..."}
|
)}
|
||||||
</span>
|
</span>
|
||||||
|
|
||||||
<span className="text-sm text-muted-foreground">Version</span>
|
<span className="text-sm text-muted-foreground">Version</span>
|
||||||
|
|||||||
Reference in New Issue
Block a user