From 5d81b64343394bc7b0057131d92bfd9bbd91bf3e Mon Sep 17 00:00:00 2001
From: scoootscooob <167050519+scoootscooob@users.noreply.github.com>
Date: Sat, 28 Mar 2026 22:20:49 -0700
Subject: [PATCH] fix(exec): fail closed when sandbox is unavailable and harden
 deny followups (#56800)

* fix(exec): fail closed when sandbox is unavailable and harden deny followups

* docs(changelog): note exec fail-closed fix
---
 CHANGELOG.md                                  |  1 +
 docs/gateway/security/index.md                |  8 ++--
 docs/tools/exec.md                            |  8 ++--
 .../bash-tools.exec-approval-followup.ts      | 22 +++++++++-
 .../bash-tools.exec.approval-id.test.ts       | 44 +++++++++++++++++++
 .../bash-tools.exec.background-abort.test.ts  |  1 +
 src/agents/bash-tools.exec.path.test.ts       | 21 +++------
 .../bash-tools.exec.pty-fallback.test.ts      |  7 ++-
 src/agents/bash-tools.exec.pty.test.ts        | 14 +++++-
 src/agents/bash-tools.exec.ts                 | 10 ++---
 .../bash-tools.process.send-keys.test.ts      |  2 +-
 src/agents/bash-tools.test.ts                 |  6 ++-
 src/agents/pi-tools-agent-config.test.ts      | 20 +++------
 src/security/audit.ts                         |  4 +-
 14 files changed, 116 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8228a1985d1..980ac436422 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,7 @@ Docs: https://docs.openclaw.ai
 - Docker/setup: force BuildKit for local image builds (including sandbox image builds) so `./docker-setup.sh` no longer fails on `RUN --mount=...` when hosts default to Docker's legacy builder. (#56681) Thanks @zhanghui-china.
 - Control UI/agents: auto-load agent workspace files on initial Files panel open, and populate overview model/workspace/fallbacks from effective runtime agent metadata so defaulted models no longer show as `Not set`. (#56637) Thanks @dxsx84.
 - Control UI/slash commands: make `/steer` and `/redirect` work from the chat command palette with visible pending state for active-run `/steer`, correct redirected-run tracking, and a single canonical `/steer` entry in the command menu. (#54625) Thanks @fuller-stack-dev.
+- Exec: fail closed when the implicit sandbox host has no sandbox runtime, and stop denied async approval followups from reusing prior command output from the same session. (#56800) Thanks @scoootscooob.
 
 ## 2026.3.28
 
diff --git a/docs/gateway/security/index.md b/docs/gateway/security/index.md
index ec1902b4d5d..12739416c2b 100644
--- a/docs/gateway/security/index.md
+++ b/docs/gateway/security/index.md
@@ -191,7 +191,7 @@ If more than one person can DM your bot:
 - **Local disk hygiene** (permissions, symlinks, config includes, “synced folder” paths).
 - **Plugins** (extensions exist without an explicit allowlist).
 - **Policy drift/misconfig** (sandbox docker settings configured but sandbox mode off; ineffective `gateway.nodes.denyCommands` patterns because matching is exact command-name only (for example `system.run`) and does not inspect shell text; dangerous `gateway.nodes.allowCommands` entries; global `tools.profile="minimal"` overridden by per-agent profiles; extension plugin tools reachable under permissive tool policy).
-- **Runtime expectation drift** (for example `tools.exec.host="sandbox"` while sandbox mode is off, which runs directly on the gateway host).
+- **Runtime expectation drift** (for example `tools.exec.host="sandbox"` while sandbox mode is off, which now fails closed because no sandbox runtime is available).
 - **Model hygiene** (warn when configured models look legacy; not a hard block).
 
 If you run `--deep`, OpenClaw also attempts a best-effort live Gateway probe.
@@ -253,8 +253,8 @@ High-signal `checkId` values you will most likely see in real deployments (not e
 | `logging.redact_off`                                          | warn          | Sensitive values leak to logs/status                                                 | `logging.redactSensitive`                                                                            | yes      |
 | `sandbox.docker_config_mode_off`                              | warn          | Sandbox Docker config present but inactive                                           | `agents.*.sandbox.mode`                                                                              | no       |
 | `sandbox.dangerous_network_mode`                              | critical      | Sandbox Docker network uses `host` or `container:*` namespace-join mode              | `agents.*.sandbox.docker.network`                                                                    | no       |
-| `tools.exec.host_sandbox_no_sandbox_defaults`                 | warn          | `exec host=sandbox` resolves to host exec when sandbox is off                        | `tools.exec.host`, `agents.defaults.sandbox.mode`                                                    | no       |
-| `tools.exec.host_sandbox_no_sandbox_agents`                   | warn          | Per-agent `exec host=sandbox` resolves to host exec when sandbox is off              | `agents.list[].tools.exec.host`, `agents.list[].sandbox.mode`                                        | no       |
+| `tools.exec.host_sandbox_no_sandbox_defaults`                 | warn          | `exec host=sandbox` fails closed when sandbox is off                                 | `tools.exec.host`, `agents.defaults.sandbox.mode`                                                    | no       |
+| `tools.exec.host_sandbox_no_sandbox_agents`                   | warn          | Per-agent `exec host=sandbox` fails closed when sandbox is off                       | `agents.list[].tools.exec.host`, `agents.list[].sandbox.mode`                                        | no       |
 | `tools.exec.security_full_configured`                         | warn/critical | Host exec is running with `security="full"`                                          | `tools.exec.security`, `agents.list[].tools.exec.security`                                           | no       |
 | `tools.exec.auto_allow_skills_enabled`                        | warn          | Exec approvals trust skill bins implicitly                                           | `~/.openclaw/exec-approvals.json`                                                                    | no       |
 | `tools.exec.allowlist_interpreter_without_strict_inline_eval` | warn          | Interpreter allowlists permit inline eval without forced reapproval                  | `tools.exec.strictInlineEval`, `agents.list[].tools.exec.strictInlineEval`, exec approvals allowlist | no       |
@@ -534,7 +534,7 @@ Even with strong system prompts, **prompt injection is not solved**. System prom
 - Prefer mention gating in groups; avoid “always-on” bots in public rooms.
 - Treat links, attachments, and pasted instructions as hostile by default.
 - Run sensitive tool execution in a sandbox; keep secrets out of the agent’s reachable filesystem.
-- Note: sandboxing is opt-in. If sandbox mode is off, exec runs on the gateway host even though tools.exec.host defaults to sandbox, and host exec does not require approvals unless you set host=gateway and configure exec approvals.
+- Note: sandboxing is opt-in. If sandbox mode is off, `host=sandbox` fails closed even though tools.exec.host defaults to sandbox. To run on the gateway host, set `host=gateway` and configure exec approvals.
 - Limit high-risk tools (`exec`, `browser`, `web_fetch`, `web_search`) to trusted agents or explicit allowlists.
 - If you allowlist interpreters (`python`, `node`, `ruby`, `perl`, `php`, `lua`, `osascript`), enable `tools.exec.strictInlineEval` so inline eval forms still need explicit approval.
 - **Model choice matters:** older/smaller/legacy models are significantly less robust against prompt injection and tool misuse. For tool-enabled agents, use the strongest latest-generation, instruction-hardened model available.
diff --git a/docs/tools/exec.md b/docs/tools/exec.md
index 048eacedaf2..d983d82f439 100644
--- a/docs/tools/exec.md
+++ b/docs/tools/exec.md
@@ -30,7 +30,7 @@ Background sessions are scoped per agent; `process` only sees sessions from the
 Notes:
 
 - `host` defaults to `sandbox`.
-- `elevated` is ignored when sandboxing is off (exec already runs on the host).
+- `elevated` forces `host=gateway`; it is only available when elevated access is enabled for the current session/provider.
 - `gateway`/`node` approvals are controlled by `~/.openclaw/exec-approvals.json`.
 - `node` requires a paired node (companion app or headless node host).
 - If multiple nodes are available, set `exec.node` or `tools.exec.node` to select one.
@@ -41,9 +41,9 @@ Notes:
 - Host execution (`gateway`/`node`) rejects `env.PATH` and loader overrides (`LD_*`/`DYLD_*`) to
   prevent binary hijacking or injected code.
 - OpenClaw sets `OPENCLAW_SHELL=exec` in the spawned command environment (including PTY and sandbox execution) so shell/profile rules can detect exec-tool context.
-- Important: sandboxing is **off by default**. If sandboxing is off and `host=sandbox` is explicitly
-  configured/requested, exec now fails closed instead of silently running on the gateway host.
-  Enable sandboxing or use `host=gateway` with approvals.
+- Important: sandboxing is **off by default**. If sandboxing is off and exec resolves to
+  `host=sandbox` (including the implicit default), exec fails closed instead of silently
+  running on the gateway host. Enable sandboxing or use `host=gateway` with approvals.
 - Script preflight checks (for common Python/Node shell-syntax mistakes) only inspect files inside the
   effective `workdir` boundary. If a script path resolves outside `workdir`, preflight is skipped for
   that file.
diff --git a/src/agents/bash-tools.exec-approval-followup.ts b/src/agents/bash-tools.exec-approval-followup.ts
index af24f07fb50..0485681f4f3 100644
--- a/src/agents/bash-tools.exec-approval-followup.ts
+++ b/src/agents/bash-tools.exec-approval-followup.ts
@@ -10,13 +10,33 @@ type ExecApprovalFollowupParams = {
   resultText: string;
 };
 
+function buildExecDeniedFollowupPrompt(resultText: string): string {
+  return [
+    "An async command did not run.",
+    "Do not run the command again.",
+    "There is no new command output.",
+    "Do not mention, summarize, or reuse output from any earlier run in this session.",
+    "",
+    "Exact completion details:",
+    resultText.trim(),
+    "",
+    "Reply to the user in a helpful way.",
+    "Explain that the command did not run and why.",
+    "Do not claim there is new command output.",
+  ].join("\n");
+}
+
 export function buildExecApprovalFollowupPrompt(resultText: string): string {
+  const trimmed = resultText.trim();
+  if (trimmed.startsWith("Exec denied (")) {
+    return buildExecDeniedFollowupPrompt(trimmed);
+  }
   return [
     "An async command the user already approved has completed.",
     "Do not run the command again.",
     "",
     "Exact completion details:",
-    resultText.trim(),
+    trimmed,
     "",
     "Reply to the user in a helpful way.",
     "If it succeeded, share the relevant output.",
diff --git a/src/agents/bash-tools.exec.approval-id.test.ts b/src/agents/bash-tools.exec.approval-id.test.ts
index bc23a87ebac..56481d054ec 100644
--- a/src/agents/bash-tools.exec.approval-id.test.ts
+++ b/src/agents/bash-tools.exec.approval-id.test.ts
@@ -449,6 +449,50 @@ describe("exec approvals", () => {
     );
   });
 
+  it("uses a deny-specific followup prompt so prior output is not reused", async () => {
+    const agentCalls: Array<Record<string, unknown>> = [];
+
+    vi.mocked(callGatewayTool).mockImplementation(async (method, _opts, params) => {
+      if (method === "exec.approval.request") {
+        return acceptedApprovalResponse(params);
+      }
+      if (method === "exec.approval.waitDecision") {
+        return { decision: "deny" };
+      }
+      if (method === "agent") {
+        agentCalls.push(params as Record<string, unknown>);
+        return { status: "ok" };
+      }
+      return { ok: true };
+    });
+
+    const tool = createExecTool({
+      host: "gateway",
+      ask: "always",
+      approvalRunningNoticeMs: 0,
+      sessionKey: "agent:main:main",
+      elevated: { enabled: true, allowed: true, defaultLevel: "ask" },
+    });
+
+    const result = await tool.execute("call-gw-followup-deny", {
+      command: "echo ok",
+      workdir: process.cwd(),
+      gatewayUrl: undefined,
+      gatewayToken: undefined,
+    });
+
+    expect(result.details.status).toBe("approval-pending");
+    await expect.poll(() => agentCalls.length, { timeout: 3_000, interval: 20 }).toBe(1);
+    expect(typeof agentCalls[0]?.message).toBe("string");
+    expect(agentCalls[0]?.message).toContain("An async command did not run.");
+    expect(agentCalls[0]?.message).toContain(
+      "Do not mention, summarize, or reuse output from any earlier run in this session.",
+    );
+    expect(agentCalls[0]?.message).not.toContain(
+      "An async command the user already approved has completed.",
+    );
+  });
+
   it("requires a separate approval for each elevated command after allow-once", async () => {
     const requestCommands: string[] = [];
     const requestIds: string[] = [];
diff --git a/src/agents/bash-tools.exec.background-abort.test.ts b/src/agents/bash-tools.exec.background-abort.test.ts
index deec92b5697..c0b117a0195 100644
--- a/src/agents/bash-tools.exec.background-abort.test.ts
+++ b/src/agents/bash-tools.exec.background-abort.test.ts
@@ -14,6 +14,7 @@ const POLL_INTERVAL_MS = 15;
 const FINISHED_WAIT_TIMEOUT_MS = process.platform === "win32" ? 8_000 : 600;
 const BACKGROUND_TIMEOUT_SEC = process.platform === "win32" ? 0.2 : 0.05;
 const TEST_EXEC_DEFAULTS = {
+  host: "gateway" as const,
   security: "full" as const,
   ask: "off" as const,
 };
diff --git a/src/agents/bash-tools.exec.path.test.ts b/src/agents/bash-tools.exec.path.test.ts
index 97e56b24be7..204660ddc69 100644
--- a/src/agents/bash-tools.exec.path.test.ts
+++ b/src/agents/bash-tools.exec.path.test.ts
@@ -251,25 +251,14 @@ describe("exec host env validation", () => {
     }
   });
 
-  it("defaults to sandbox when sandbox runtime is unavailable", async () => {
+  it("fails closed when the implicit sandbox host has no sandbox runtime", async () => {
     const tool = createExecTool({ security: "full", ask: "off" });
 
-    const result = await tool.execute("call1", {
-      command: "echo ok",
-    });
-    const text = normalizeText(result.content.find((c) => c.type === "text")?.text);
-    expect(text).toContain("ok");
-
-    const err = await tool
-      .execute("call2", {
+    await expect(
+      tool.execute("call1", {
         command: "echo ok",
-        host: "gateway",
-      })
-      .then(() => null)
-      .catch((error: unknown) => (error instanceof Error ? error : new Error(String(error))));
-    expect(err).toBeTruthy();
-    expect(err?.message).toMatch(/exec host not allowed/);
-    expect(err?.message).toMatch(/tools\.exec\.host=sandbox/);
+      }),
+    ).rejects.toThrow(/sandbox runtime is unavailable/);
   });
 
   it("fails closed when sandbox host is explicitly configured without sandbox runtime", async () => {
diff --git a/src/agents/bash-tools.exec.pty-fallback.test.ts b/src/agents/bash-tools.exec.pty-fallback.test.ts
index 62e68653a07..76284af2205 100644
--- a/src/agents/bash-tools.exec.pty-fallback.test.ts
+++ b/src/agents/bash-tools.exec.pty-fallback.test.ts
@@ -16,7 +16,12 @@ afterEach(() => {
 });
 
 test("exec falls back when PTY spawn fails", async () => {
-  const tool = createExecTool({ allowBackground: false, security: "full", ask: "off" });
+  const tool = createExecTool({
+    allowBackground: false,
+    host: "gateway",
+    security: "full",
+    ask: "off",
+  });
   const result = await tool.execute("toolcall", {
     command: "printf ok",
     pty: true,
diff --git a/src/agents/bash-tools.exec.pty.test.ts b/src/agents/bash-tools.exec.pty.test.ts
index 10185f57282..a9f3cf47520 100644
--- a/src/agents/bash-tools.exec.pty.test.ts
+++ b/src/agents/bash-tools.exec.pty.test.ts
@@ -7,7 +7,12 @@ afterEach(() => {
 });
 
 test("exec supports pty output", async () => {
-  const tool = createExecTool({ allowBackground: false, security: "full", ask: "off" });
+  const tool = createExecTool({
+    allowBackground: false,
+    host: "gateway",
+    security: "full",
+    ask: "off",
+  });
   const result = await tool.execute("toolcall", {
     command: 'node -e "process.stdout.write(String.fromCharCode(111,107))"',
     pty: true,
@@ -19,7 +24,12 @@ test("exec supports pty output", async () => {
 });
 
 test("exec sets OPENCLAW_SHELL in pty mode", async () => {
-  const tool = createExecTool({ allowBackground: false, security: "full", ask: "off" });
+  const tool = createExecTool({
+    allowBackground: false,
+    host: "gateway",
+    security: "full",
+    ask: "off",
+  });
   const result = await tool.execute("toolcall-openclaw-shell", {
     command: "node -e \"process.stdout.write(process.env.OPENCLAW_SHELL || '')\"",
     pty: true,
diff --git a/src/agents/bash-tools.exec.ts b/src/agents/bash-tools.exec.ts
index 8e66745eae5..c563609e784 100644
--- a/src/agents/bash-tools.exec.ts
+++ b/src/agents/bash-tools.exec.ts
@@ -330,7 +330,6 @@ export function createExecTool(
         logInfo(`exec: elevated command ${truncateMiddle(params.command, 120)}`);
       }
       const configuredHost = defaults?.host ?? "sandbox";
-      const sandboxHostConfigured = defaults?.host === "sandbox";
       const requestedHost = normalizeExecHost(params.host) ?? null;
       let host: ExecHost = requestedHost ?? configuredHost;
       if (!elevatedRequested && requestedHost && requestedHost !== configuredHost) {
@@ -359,14 +358,11 @@ export function createExecTool(
       }
 
       const sandbox = host === "sandbox" ? defaults?.sandbox : undefined;
-      if (
-        host === "sandbox" &&
-        !sandbox &&
-        (sandboxHostConfigured || requestedHost === "sandbox")
-      ) {
+      // Never fall through to direct host exec when the selected host was sandbox.
+      if (host === "sandbox" && !sandbox) {
         throw new Error(
           [
-            "exec host=sandbox is configured, but sandbox runtime is unavailable for this session.",
+            "exec host resolved to sandbox, but sandbox runtime is unavailable for this session.",
             'Enable sandbox mode (`agents.defaults.sandbox.mode="non-main"` or `"all"`) or set tools.exec.host to "gateway"/"node".',
           ].join("\n"),
         );
diff --git a/src/agents/bash-tools.process.send-keys.test.ts b/src/agents/bash-tools.process.send-keys.test.ts
index 28e7b59d0b0..b6e5bef17d9 100644
--- a/src/agents/bash-tools.process.send-keys.test.ts
+++ b/src/agents/bash-tools.process.send-keys.test.ts
@@ -19,7 +19,7 @@ afterEach(() => {
 });
 
 async function startPtySession(command: string) {
-  const execTool = createExecTool({ security: "full", ask: "off" });
+  const execTool = createExecTool({ host: "gateway", security: "full", ask: "off" });
   const processTool = createProcessTool();
   const result = await execTool.execute("toolcall", {
     command,
diff --git a/src/agents/bash-tools.test.ts b/src/agents/bash-tools.test.ts
index 4929ea23be7..bd34c1ff457 100644
--- a/src/agents/bash-tools.test.ts
+++ b/src/agents/bash-tools.test.ts
@@ -44,7 +44,11 @@ const COMMAND_PRINT_PATH = isWin ? "Write-Output $env:PATH" : "echo $PATH";
 const COMMAND_EXIT_WITH_ERROR = "exit 1";
 const SCOPE_KEY_ALPHA = "agent:alpha";
 const SCOPE_KEY_BETA = "agent:beta";
-const TEST_EXEC_DEFAULTS = { security: "full" as const, ask: "off" as const };
+const TEST_EXEC_DEFAULTS = {
+  host: "gateway" as const,
+  security: "full" as const,
+  ask: "off" as const,
+};
 const DEFAULT_NOTIFY_SESSION_KEY = "agent:main:main";
 const ECHO_HI_COMMAND = shellEcho("hi");
 let callIdCounter = 0;
diff --git a/src/agents/pi-tools-agent-config.test.ts b/src/agents/pi-tools-agent-config.test.ts
index ff95e89b7ee..d4b8aff7ea4 100644
--- a/src/agents/pi-tools-agent-config.test.ts
+++ b/src/agents/pi-tools-agent-config.test.ts
@@ -633,6 +633,7 @@ describe("Agent-specific tool filtering", () => {
       tools: {
         deny: ["process"],
         exec: {
+          host: "gateway",
           security: "full",
           ask: "off",
         },
@@ -657,7 +658,7 @@ describe("Agent-specific tool filtering", () => {
     expect(resultDetails?.status).toBe("completed");
   });
 
-  it("keeps sandbox as the implicit exec host default without forcing gateway approvals", async () => {
+  it("fails closed when the implicit exec host resolves to sandbox without a runtime", async () => {
     const tools = createOpenClawCodingTools({
       config: {},
       sessionKey: "agent:main:main",
@@ -667,18 +668,11 @@ describe("Agent-specific tool filtering", () => {
     const execTool = tools.find((tool) => tool.name === "exec");
     expect(execTool).toBeDefined();
 
-    const result = await execTool!.execute("call-implicit-sandbox-default", {
-      command: "echo done",
-    });
-    const details = result?.details as { status?: string } | undefined;
-    expect(details?.status).toBe("completed");
-
     await expect(
-      execTool!.execute("call-implicit-sandbox-gateway", {
+      execTool!.execute("call-implicit-sandbox-default", {
         command: "echo done",
-        host: "gateway",
       }),
-    ).rejects.toThrow("exec host not allowed");
+    ).rejects.toThrow("sandbox runtime is unavailable");
   });
 
   it("fails closed when exec host=sandbox is requested without sandbox runtime", async () => {
@@ -695,7 +689,7 @@ describe("Agent-specific tool filtering", () => {
         command: "echo done",
         host: "sandbox",
       }),
-    ).rejects.toThrow("exec host=sandbox is configured");
+    ).rejects.toThrow("sandbox runtime is unavailable");
   });
 
   it("should apply agent-specific exec host defaults over global defaults", async () => {
@@ -738,14 +732,14 @@ describe("Agent-specific tool filtering", () => {
         command: "echo done",
         yieldMs: 1000,
       }),
-    ).rejects.toThrow("exec host=sandbox is configured");
+    ).rejects.toThrow("sandbox runtime is unavailable");
     await expect(
       helperExecTool!.execute("call-helper", {
         command: "echo done",
         host: "sandbox",
         yieldMs: 1000,
       }),
-    ).rejects.toThrow("exec host=sandbox is configured");
+    ).rejects.toThrow("sandbox runtime is unavailable");
   });
 
   it("applies explicit agentId exec defaults when sessionKey is opaque", async () => {
diff --git a/src/security/audit.ts b/src/security/audit.ts
index cab5e41653b..e5895c14ab5 100644
--- a/src/security/audit.ts
+++ b/src/security/audit.ts
@@ -907,7 +907,7 @@ function collectExecRuntimeFindings(cfg: OpenClawConfig): SecurityAuditFinding[]
       title: "Exec host is sandbox but sandbox mode is off",
       detail:
         "tools.exec.host is explicitly set to sandbox while agents.defaults.sandbox.mode=off. " +
-        "In this mode, exec runs directly on the gateway host.",
+        "In this mode, exec fails closed because no sandbox runtime is available.",
       remediation:
         'Enable sandbox mode (`agents.defaults.sandbox.mode="non-main"` or `"all"`) or set tools.exec.host to "gateway" with approvals.',
     });
@@ -933,7 +933,7 @@ function collectExecRuntimeFindings(cfg: OpenClawConfig): SecurityAuditFinding[]
       title: "Agent exec host uses sandbox while sandbox mode is off",
       detail:
         `agents.list.*.tools.exec.host is set to sandbox for: ${riskyAgents.join(", ")}. ` +
-        "With sandbox mode off, exec runs directly on the gateway host.",
+        "With sandbox mode off, exec fails closed for those agents.",
       remediation:
         'Enable sandbox mode for these agents (`agents.list[].sandbox.mode`) or set their tools.exec.host to "gateway".',
     });