diff --git a/.agents/skills/openclaw-parallels-smoke/SKILL.md b/.agents/skills/openclaw-parallels-smoke/SKILL.md index b2007592f2f..b77f4933228 100644 --- a/.agents/skills/openclaw-parallels-smoke/SKILL.md +++ b/.agents/skills/openclaw-parallels-smoke/SKILL.md @@ -66,7 +66,8 @@ Use this skill for Parallels guest workflows and smoke interpretation. Do not lo - Windows global `npm install -g` phases can stay quiet for a minute or more even when healthy; inspect the phase log before calling it hung, and only treat it as a regression once the retry wrapper or timeout trips. - Fresh Windows ref-mode onboard should use the same background PowerShell runner plus done-file/log-drain pattern as the npm-update helper, including startup materialization checks, host-side timeouts on short poll `prlctl exec` calls, and retry-on-poll-failure behavior for transient transport flakes. - Fresh Windows ref-mode agent verification should set `OPENAI_API_KEY` in the PowerShell environment before invoking `openclaw.cmd agent`, for the same pairing-required fallback reason as macOS. -- The Windows upgrade smoke lane should restart the managed gateway after `upgrade.install-main` and before `upgrade.onboard-ref`, or the old process can keep the previous gateway token and fail `gateway-health` with `unauthorized: gateway token mismatch`. +- The standalone Windows upgrade smoke lane should stop the managed gateway after `upgrade.install-main` and before `upgrade.onboard-ref`. Restarting before onboard can leave the old process alive on the pre-onboard token while onboard rewrites `~/.openclaw/openclaw.json`, which then fails `gateway-health` with `unauthorized: gateway token mismatch`. +- If standalone Windows upgrade fails with a gateway token mismatch but `pnpm test:parallels:npm-update` passes, trust the mismatch as a standalone ref-onboard ordering bug first; the npm-update helper does not re-run ref-mode onboard on the same guest. - Keep onboarding and status output ASCII-clean in logs; fancy punctuation becomes mojibake in current capture paths. - If you hit an older run with `rc=255` plus an empty `fresh.install-main.log` or `upgrade.install-main.log`, treat it as a likely `prlctl exec` transport drop after guest start-up, not immediate proof of an npm/package failure. diff --git a/scripts/e2e/parallels-windows-smoke.sh b/scripts/e2e/parallels-windows-smoke.sh index b94fdb61ea5..624d7b2781f 100644 --- a/scripts/e2e/parallels-windows-smoke.sh +++ b/scripts/e2e/parallels-windows-smoke.sh @@ -984,12 +984,13 @@ verify_gateway() { guest_run_openclaw "" "" gateway status --deep --require-rpc } -restart_gateway() { +run_gateway_daemon_action() { + local action="$1" local runner_name log_name done_name done_status launcher_state local poll_rc state_rc log_rc start_seconds poll_deadline startup_checked - runner_name="openclaw-gateway-restart-$RANDOM-$RANDOM.ps1" - log_name="openclaw-gateway-restart-$RANDOM-$RANDOM.log" - done_name="openclaw-gateway-restart-$RANDOM-$RANDOM.done" + runner_name="openclaw-gateway-$action-$RANDOM-$RANDOM.ps1" + log_name="openclaw-gateway-$action-$RANDOM-$RANDOM.log" + done_name="openclaw-gateway-$action-$RANDOM-$RANDOM.done" start_seconds="$SECONDS" poll_deadline=$((SECONDS + TIMEOUT_GATEWAY_S + 60)) startup_checked=0 @@ -1006,7 +1007,7 @@ Remove-Item \$runner, \$log, \$done -Force -ErrorAction SilentlyContinue \$done = Join-Path \$env:TEMP '$done_name' try { \$openclaw = Join-Path \$env:APPDATA 'npm\openclaw.cmd' - & \$openclaw gateway restart *>&1 | Tee-Object -FilePath \$log -Append | Out-Null + & \$openclaw gateway $action *>&1 | Tee-Object -FilePath \$log -Append | Out-Null Set-Content -Path \$done -Value ([string]\$LASTEXITCODE) } catch { if (Test-Path \$log) { @@ -1030,9 +1031,9 @@ EOF set -e done_status="${done_status//$'\r'/}" if [[ $poll_rc -ne 0 ]]; then - warn "windows gateway restart helper poll failed; retrying" + warn "windows gateway $action helper poll failed; retrying" if (( SECONDS >= poll_deadline )); then - warn "windows gateway restart helper timed out while polling done file" + warn "windows gateway $action helper timed out while polling done file" return 1 fi sleep 2 @@ -1044,7 +1045,7 @@ EOF log_rc=$? set -e if [[ $log_rc -ne 0 ]]; then - warn "windows gateway restart helper log drain failed after completion" + warn "windows gateway $action helper log drain failed after completion" fi [[ "$done_status" == "0" ]] return $? @@ -1059,7 +1060,7 @@ EOF launcher_state="${launcher_state//$'\r'/}" startup_checked=1 if [[ $state_rc -eq 0 && "$launcher_state" == *"runner=False"* && "$launcher_state" == *"log=False"* && "$launcher_state" == *"done=False"* ]]; then - warn "windows gateway restart helper failed to materialize guest files" + warn "windows gateway $action helper failed to materialize guest files" return 1 fi fi @@ -1069,15 +1070,23 @@ EOF log_rc=$? set -e if [[ $log_rc -ne 0 ]]; then - warn "windows gateway restart helper log drain failed after timeout" + warn "windows gateway $action helper log drain failed after timeout" fi - warn "windows gateway restart helper timed out waiting for done file" + warn "windows gateway $action helper timed out waiting for done file" return 1 fi sleep 2 done } +restart_gateway() { + run_gateway_daemon_action restart +} + +stop_gateway() { + run_gateway_daemon_action stop +} + show_gateway_status_compat() { if guest_run_openclaw "" "" gateway status --help | grep -Fq -- "--require-rpc"; then guest_run_openclaw "" "" gateway status --deep --require-rpc @@ -1145,7 +1154,10 @@ run_upgrade_lane() { phase_run "upgrade.install-main" "$TIMEOUT_INSTALL_S" install_main_tgz "$host_ip" "openclaw-main-upgrade.tgz" || return $? UPGRADE_MAIN_VERSION="$(extract_last_version "$(phase_log_path upgrade.install-main)")" phase_run "upgrade.verify-main-version" "$TIMEOUT_VERIFY_S" verify_target_version || return $? - phase_run "upgrade.gateway-restart" "$TIMEOUT_GATEWAY_S" restart_gateway || return $? + # Stop the old managed gateway before ref-mode onboard rewrites config and + # gateway auth. Restarting first can leave the old token alive and make the + # onboard health probe fail against a stale daemon. + phase_run "upgrade.gateway-stop" "$TIMEOUT_GATEWAY_S" stop_gateway || return $? phase_run "upgrade.onboard-ref" "$TIMEOUT_ONBOARD_S" run_ref_onboard || return $? phase_run "upgrade.gateway-status" "$TIMEOUT_GATEWAY_S" verify_gateway || return $? UPGRADE_GATEWAY_STATUS="pass"