feat: QR code scanning for gateway onboarding

iOS:
- QR scanner view using DataScannerViewController
- Photo library QR detection via CIDetector for saved QR images
- Deep link parser for openclaw://gateway URLs and base64url setup codes
- Onboarding wizard: full-screen welcome with "Scan QR Code" button,
  auto-connect on scan, back navigation, step indicators for manual flow

Backend:
- Add /pair qr action to device-pair extension for QR code generation
- TUI/WebUI differentiation: ASCII QR for TUI, markdown image for WebUI
- Telegram: send QR as media attachment via sendMessageTelegram
- Add data URI support to loadWebMedia for generic base64 media handling
- Export renderQrPngBase64 from plugin SDK for extension use

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Hongwei Ma 2026-02-14 21:47:33 +08:00
parent a08dab815e
commit d79ed65be0
8 changed files with 525 additions and 89 deletions

View File

@ -457,6 +457,8 @@ final class NodeAppModel {
switch route {
case let .agent(link):
await self.handleAgentDeepLink(link, originalURL: url)
case .gateway:
break
}
}

View File

@ -28,9 +28,11 @@ enum OnboardingStateStore {
return appModel.gatewayServerName == nil
}
static func markCompleted(mode: OnboardingConnectionMode, defaults: UserDefaults = .standard) {
static func markCompleted(mode: OnboardingConnectionMode? = nil, defaults: UserDefaults = .standard) {
defaults.set(true, forKey: Self.completedDefaultsKey)
defaults.set(mode.rawValue, forKey: Self.lastModeDefaultsKey)
if let mode {
defaults.set(mode.rawValue, forKey: Self.lastModeDefaultsKey)
}
defaults.set(Int(Date().timeIntervalSince1970), forKey: Self.lastSuccessTimeDefaultsKey)
}

View File

@ -1,4 +1,6 @@
import CoreImage
import OpenClawKit
import PhotosUI
import SwiftUI
import UIKit
@ -9,20 +11,34 @@ private enum OnboardingStep: Int, CaseIterable {
case auth
case success
var progressTitle: String {
var previous: Self? {
Self(rawValue: self.rawValue - 1)
}
var next: Self? {
Self(rawValue: self.rawValue + 1)
}
/// Progress label for the manual setup flow (mode connect auth success).
var manualProgressTitle: String {
let manualSteps: [OnboardingStep] = [.mode, .connect, .auth, .success]
guard let idx = manualSteps.firstIndex(of: self) else { return "" }
return "Step \(idx + 1) of \(manualSteps.count)"
}
var title: String {
switch self {
case .welcome:
"Step 1 of 5"
case .mode:
"Step 2 of 5"
case .connect:
"Step 3 of 5"
case .auth:
"Step 4 of 5"
case .success:
"Step 5 of 5"
case .welcome: "Welcome"
case .mode: "Connection Mode"
case .connect: "Connect"
case .auth: "Authentication"
case .success: "Connected"
}
}
var canGoBack: Bool {
self != .welcome && self != .success
}
}
struct OnboardingWizardView: View {
@ -43,41 +59,138 @@ struct OnboardingWizardView: View {
@State private var issue: GatewayConnectionIssue = .none
@State private var didMarkCompleted = false
@State private var discoveryRestartTask: Task<Void, Never>?
@State private var showQRScanner: Bool = false
@State private var scannerError: String?
@State private var selectedPhoto: PhotosPickerItem?
let allowSkip: Bool
let onClose: () -> Void
private var isFullScreenStep: Bool {
self.step == .welcome || self.step == .success
}
var body: some View {
NavigationStack {
Form {
Section {
Text(self.step.progressTitle)
.font(.footnote.weight(.semibold))
.foregroundStyle(.secondary)
}
Group {
switch self.step {
case .welcome:
self.welcomeStep
case .mode:
self.modeStep
case .connect:
self.connectStep
case .auth:
self.authStep
case .success:
self.successStep
default:
Form {
switch self.step {
case .mode:
self.modeStep
case .connect:
self.connectStep
case .auth:
self.authStep
default:
EmptyView()
}
}
.scrollDismissesKeyboard(.interactively)
}
}
.navigationTitle("OpenClaw Setup")
.navigationTitle(self.isFullScreenStep ? "" : self.step.title)
.navigationBarTitleDisplayMode(.inline)
.toolbar {
if !self.isFullScreenStep {
ToolbarItem(placement: .principal) {
VStack(spacing: 2) {
Text(self.step.title)
.font(.headline)
Text(self.step.manualProgressTitle)
.font(.caption2)
.foregroundStyle(.secondary)
}
}
}
ToolbarItem(placement: .topBarLeading) {
if self.allowSkip {
if self.step.canGoBack {
Button {
self.navigateBack()
} label: {
Label("Back", systemImage: "chevron.left")
}
} else if self.allowSkip {
Button("Close") {
self.onClose()
}
}
}
ToolbarItemGroup(placement: .keyboard) {
Spacer()
Button("Done") {
UIApplication.shared.sendAction(
#selector(UIResponder.resignFirstResponder),
to: nil, from: nil, for: nil)
}
}
}
}
.alert("QR Scanner Unavailable", isPresented: Binding(
get: { self.scannerError != nil },
set: { if !$0 { self.scannerError = nil } }
)) {
Button("OK", role: .cancel) {}
} message: {
Text(self.scannerError ?? "")
}
.sheet(isPresented: self.$showQRScanner) {
NavigationStack {
QRScannerView(
onGatewayLink: { link in
self.handleScannedLink(link)
},
onError: { error in
self.showQRScanner = false
self.scannerError = error
},
onDismiss: {
self.showQRScanner = false
})
.ignoresSafeArea()
.navigationTitle("Scan QR Code")
.navigationBarTitleDisplayMode(.inline)
.toolbar {
ToolbarItem(placement: .topBarLeading) {
Button("Cancel") { self.showQRScanner = false }
}
ToolbarItem(placement: .topBarTrailing) {
PhotosPicker(selection: self.$selectedPhoto, matching: .images) {
Label("Photos", systemImage: "photo")
}
}
}
}
.onChange(of: self.selectedPhoto) { _, newValue in
guard let item = newValue else { return }
self.selectedPhoto = nil
Task {
guard let data = try? await item.loadTransferable(type: Data.self) else {
self.showQRScanner = false
self.scannerError = "Could not load the selected image."
return
}
if let message = self.detectQRCode(from: data) {
if let link = GatewayConnectDeepLink.fromSetupCode(message) {
self.handleScannedLink(link)
return
}
if let url = URL(string: message),
let route = DeepLinkParser.parse(url),
case let .gateway(link) = route
{
self.handleScannedLink(link)
return
}
}
self.showQRScanner = false
self.scannerError = "No valid QR code found in the selected image."
}
}
}
.onAppear {
@ -116,15 +229,49 @@ struct OnboardingWizardView: View {
}
}
@ViewBuilder
private var welcomeStep: some View {
Section("Welcome") {
Text("Connect this iOS node to your OpenClaw gateway.")
Text("Pick your connection mode, connect, then approve pairing if prompted.")
.font(.footnote)
VStack(spacing: 0) {
Spacer()
Image(systemName: "qrcode.viewfinder")
.font(.system(size: 64))
.foregroundStyle(.tint)
.padding(.bottom, 20)
Text("Welcome")
.font(.largeTitle.weight(.bold))
.padding(.bottom, 8)
Text("Connect to your OpenClaw gateway")
.font(.subheadline)
.foregroundStyle(.secondary)
Button("Continue") {
self.step = .mode
.multilineTextAlignment(.center)
.padding(.horizontal, 32)
Spacer()
VStack(spacing: 12) {
Button {
self.showQRScanner = true
} label: {
Label("Scan QR Code", systemImage: "qrcode")
.frame(maxWidth: .infinity)
}
.buttonStyle(.borderedProminent)
.controlSize(.large)
Button {
self.step = .mode
} label: {
Text("Set Up Manually")
.frame(maxWidth: .infinity)
}
.buttonStyle(.bordered)
.controlSize(.large)
}
.padding(.horizontal, 24)
.padding(.bottom, 48)
}
}
@ -180,11 +327,16 @@ struct OnboardingWizardView: View {
@ViewBuilder
private var connectStep: some View {
if let selectedMode {
Section("Connect") {
Text(selectedMode.title)
.font(.headline)
Section {
LabeledContent("Mode", value: selectedMode.title)
LabeledContent("Discovery", value: self.gatewayController.discoveryStatusText)
LabeledContent("Status", value: self.appModel.gatewayStatusText)
} header: {
Text("Status")
} footer: {
if let connectMessage {
Text(connectMessage)
}
}
switch selectedMode {
@ -203,14 +355,6 @@ struct OnboardingWizardView: View {
}
}
}
if let connectMessage {
Section {
Text(connectMessage)
.font(.footnote)
.foregroundStyle(.secondary)
}
}
}
private var homeNetworkConnectSection: some View {
@ -261,48 +405,36 @@ struct OnboardingWizardView: View {
}
private var remoteDomainConnectSection: some View {
Group {
self.manualConnectionFieldsSection(title: "Domain Settings")
Section("TLS") {
Text("TLS stays enabled by default for internet-facing gateways.")
.font(.footnote)
.foregroundStyle(.secondary)
}
}
self.manualConnectionFieldsSection(title: "Domain Settings")
}
private var developerConnectSection: some View {
Group {
Section("Developer Local") {
TextField("Host", text: self.$manualHost)
.textInputAutocapitalization(.never)
.autocorrectionDisabled()
TextField("Port", value: self.$manualPort, format: .number)
.keyboardType(.numberPad)
Toggle("Use TLS", isOn: self.$manualTLS)
Section {
TextField("Host", text: self.$manualHost)
.textInputAutocapitalization(.never)
.autocorrectionDisabled()
TextField("Port", value: self.$manualPort, format: .number)
.keyboardType(.numberPad)
Toggle("Use TLS", isOn: self.$manualTLS)
Button {
Task { await self.connectManual() }
} label: {
if self.connectingGatewayID == "manual" {
HStack(spacing: 8) {
ProgressView()
.progressViewStyle(.circular)
Text("Connecting…")
}
} else {
Text("Connect")
Button {
Task { await self.connectManual() }
} label: {
if self.connectingGatewayID == "manual" {
HStack(spacing: 8) {
ProgressView()
.progressViewStyle(.circular)
Text("Connecting…")
}
} else {
Text("Connect")
}
.disabled(!self.canConnectManual || self.connectingGatewayID != nil)
}
Section {
Text("Default host is localhost. Use your Mac LAN IP if simulator networking requires it.")
.font(.footnote)
.foregroundStyle(.secondary)
}
.disabled(!self.canConnectManual || self.connectingGatewayID != nil)
} header: {
Text("Developer Local")
} footer: {
Text("Default host is localhost. Use your Mac LAN IP if simulator networking requires it.")
}
}
@ -320,10 +452,7 @@ struct OnboardingWizardView: View {
}
if self.issue.needsPairing {
Section("Pairing Approval") {
Text("On gateway host run:")
.font(.footnote)
.foregroundStyle(.secondary)
Section {
Button("Copy: openclaw devices list") {
UIPasteboard.general.string = "openclaw devices list"
}
@ -337,6 +466,10 @@ struct OnboardingWizardView: View {
UIPasteboard.general.string = "openclaw devices approve <requestId>"
}
}
} header: {
Text("Pairing Approval")
} footer: {
Text("Run these commands on your gateway host to approve this device.")
}
}
@ -357,17 +490,42 @@ struct OnboardingWizardView: View {
}
private var successStep: some View {
Section("Connected") {
VStack(spacing: 0) {
Spacer()
Image(systemName: "checkmark.circle.fill")
.font(.system(size: 64))
.foregroundStyle(.green)
.padding(.bottom, 20)
Text("Connected")
.font(.largeTitle.weight(.bold))
.padding(.bottom, 8)
let server = self.appModel.gatewayServerName ?? "gateway"
Text("Connected to \(server).")
Text(server)
.font(.subheadline)
.foregroundStyle(.secondary)
.padding(.bottom, 4)
if let addr = self.appModel.gatewayRemoteAddress {
Text(addr)
.font(.footnote)
.font(.subheadline)
.foregroundStyle(.secondary)
}
Button("Open OpenClaw") {
Spacer()
Button {
self.onClose()
} label: {
Text("Open OpenClaw")
.frame(maxWidth: .infinity)
}
.buttonStyle(.borderedProminent)
.controlSize(.large)
.padding(.horizontal, 24)
.padding(.bottom, 48)
}
}
@ -401,6 +559,45 @@ struct OnboardingWizardView: View {
}
}
private func handleScannedLink(_ link: GatewayConnectDeepLink) {
self.manualHost = link.host
self.manualPort = link.port
self.manualTLS = link.tls
if let token = link.token {
self.gatewayToken = token
}
if let password = link.password {
self.gatewayPassword = password
}
self.showQRScanner = false
self.connectMessage = "Connecting via QR code…"
self.step = .connect
if self.selectedMode == nil {
self.selectedMode = link.tls ? .remoteDomain : .homeNetwork
}
Task { await self.connectManual() }
}
private func detectQRCode(from data: Data) -> String? {
guard let ciImage = CIImage(data: data) else { return nil }
let detector = CIDetector(
ofType: CIDetectorTypeQRCode, context: nil,
options: [CIDetectorAccuracy: CIDetectorAccuracyHigh])
let features = detector?.features(in: ciImage) ?? []
for feature in features {
if let qr = feature as? CIQRCodeFeature, let message = qr.messageString {
return message
}
}
return nil
}
private func navigateBack() {
guard let target = self.step.previous else { return }
self.connectingGatewayID = nil
self.connectMessage = nil
self.step = target
}
private var canConnectManual: Bool {
let host = self.manualHost.trimmingCharacters(in: .whitespacesAndNewlines)
return !host.isEmpty && self.manualPort > 0 && self.manualPort <= 65535

View File

@ -0,0 +1,65 @@
import OpenClawKit
import SwiftUI
import VisionKit
struct QRScannerView: UIViewControllerRepresentable {
let onGatewayLink: (GatewayConnectDeepLink) -> Void
let onError: (String) -> Void
let onDismiss: () -> Void
func makeUIViewController(context: Context) -> DataScannerViewController {
let scanner = DataScannerViewController(
recognizedDataTypes: [.barcode(symbologies: [.qr])],
isHighlightingEnabled: true)
scanner.delegate = context.coordinator
try? scanner.startScanning()
return scanner
}
func updateUIViewController(_: DataScannerViewController, context _: Context) {}
func makeCoordinator() -> Coordinator {
Coordinator(parent: self)
}
final class Coordinator: NSObject, DataScannerViewControllerDelegate {
let parent: QRScannerView
private var handled = false
init(parent: QRScannerView) {
self.parent = parent
}
func dataScanner(_: DataScannerViewController, didAdd items: [RecognizedItem], allItems _: [RecognizedItem]) {
guard !self.handled else { return }
for item in items {
guard case let .barcode(barcode) = item,
let payload = barcode.payloadStringValue
else { continue }
// Try setup code format first (base64url JSON from /pair qr).
if let link = GatewayConnectDeepLink.fromSetupCode(payload) {
self.handled = true
self.parent.onGatewayLink(link)
return
}
// Fall back to deep link URL format (openclaw://gateway?...).
if let url = URL(string: payload),
let route = DeepLinkParser.parse(url),
case let .gateway(link) = route
{
self.handled = true
self.parent.onGatewayLink(link)
return
}
}
}
func dataScanner(_: DataScannerViewController, didRemove _: [RecognizedItem], allItems _: [RecognizedItem]) {}
func dataScanner(_: DataScannerViewController, becameUnavailableWithError error: DataScannerViewController.ScanningUnavailable) {
self.parent.onError("Camera is not available on this device.")
}
}
}

View File

@ -2,6 +2,56 @@ import Foundation
public enum DeepLinkRoute: Sendable, Equatable {
case agent(AgentDeepLink)
case gateway(GatewayConnectDeepLink)
}
public struct GatewayConnectDeepLink: Codable, Sendable, Equatable {
public let host: String
public let port: Int
public let tls: Bool
public let token: String?
public let password: String?
public init(host: String, port: Int, tls: Bool, token: String?, password: String?) {
self.host = host
self.port = port
self.tls = tls
self.token = token
self.password = password
}
public var websocketURL: URL? {
let scheme = self.tls ? "wss" : "ws"
return URL(string: "\(scheme)://\(self.host):\(self.port)")
}
/// Parse a device-pair setup code (base64url-encoded JSON: `{url, token?, password?}`).
public static func fromSetupCode(_ code: String) -> GatewayConnectDeepLink? {
guard let data = Self.decodeBase64Url(code) else { return nil }
guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else { return nil }
guard let urlString = json["url"] as? String,
let parsed = URLComponents(string: urlString),
let hostname = parsed.host, !hostname.isEmpty
else { return nil }
let scheme = parsed.scheme ?? "ws"
let tls = scheme == "wss"
let port = parsed.port ?? 18789
let token = json["token"] as? String
let password = json["password"] as? String
return GatewayConnectDeepLink(host: hostname, port: port, tls: tls, token: token, password: password)
}
private static func decodeBase64Url(_ input: String) -> Data? {
var base64 = input
.replacingOccurrences(of: "-", with: "+")
.replacingOccurrences(of: "_", with: "/")
let remainder = base64.count % 4
if remainder > 0 {
base64.append(contentsOf: String(repeating: "=", count: 4 - remainder))
}
return Data(base64Encoded: base64)
}
}
public struct AgentDeepLink: Codable, Sendable, Equatable {
@ -69,6 +119,23 @@ public enum DeepLinkParser {
channel: query["channel"],
timeoutSeconds: timeoutSeconds,
key: query["key"]))
case "gateway":
guard let hostParam = query["host"],
!hostParam.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty
else {
return nil
}
let port = query["port"].flatMap { Int($0) } ?? 18789
let tls = (query["tls"] as NSString?)?.boolValue ?? false
return .gateway(
.init(
host: hostParam,
port: port,
tls: tls,
token: query["token"],
password: query["password"]))
default:
return nil
}

View File

@ -1,6 +1,15 @@
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
import os from "node:os";
import { approveDevicePairing, listDevicePairing } from "openclaw/plugin-sdk";
import { approveDevicePairing, listDevicePairing, renderQrPngBase64 } from "openclaw/plugin-sdk";
import qrcode from "qrcode-terminal";
function renderQrAscii(data: string): Promise<string> {
return new Promise((resolve) => {
qrcode.generate(data, { small: true }, (output: string) => {
resolve(output);
});
});
}
const DEFAULT_GATEWAY_PORT = 18789;
@ -449,6 +458,84 @@ export default function register(api: OpenClawPluginApi) {
password: auth.password,
};
if (action === "qr") {
const setupCode = encodeSetupCode(payload);
const [qrBase64, qrAscii] = await Promise.all([
renderQrPngBase64(setupCode),
renderQrAscii(setupCode),
]);
const authLabel = auth.label ?? "auth";
const dataUrl = `data:image/png;base64,${qrBase64}`;
const channel = ctx.channel;
const target = ctx.senderId?.trim() || ctx.from?.trim() || ctx.to?.trim() || "";
if (channel === "telegram" && target) {
try {
const send = api.runtime?.channel?.telegram?.sendMessageTelegram;
if (send) {
await send(target, "Scan this QR code with the OpenClaw iOS app:", {
...(ctx.messageThreadId != null ? { messageThreadId: ctx.messageThreadId } : {}),
...(ctx.accountId ? { accountId: ctx.accountId } : {}),
mediaUrl: dataUrl,
});
return {
text: [
`Gateway: ${payload.url}`,
`Auth: ${authLabel}`,
"",
"After scanning, come back here and run `/pair approve` to complete pairing.",
].join("\n"),
};
}
} catch (err) {
api.logger.warn?.(
`device-pair: telegram QR send failed, falling back (${String(
(err as Error)?.message ?? err,
)})`,
);
}
}
// Render based on channel capability
api.logger.info?.(`device-pair: QR fallback channel=${channel} target=${target}`);
const infoLines = [
`Gateway: ${payload.url}`,
`Auth: ${authLabel}`,
"",
"After scanning, run `/pair approve` to complete pairing.",
];
// TUI (gateway-client) needs ASCII, WebUI can render markdown images
const isTui = target === "gateway-client" || channel !== "webchat";
if (!isTui) {
// WebUI: markdown image only
return {
text: [
"Scan this QR code with the OpenClaw iOS app:",
"",
`![Pairing QR](${dataUrl})`,
"",
...infoLines,
].join("\n"),
};
}
// CLI/TUI: ASCII QR only
return {
text: [
"Scan this QR code with the OpenClaw iOS app:",
"",
"```",
qrAscii,
"```",
"",
...infoLines,
].join("\n"),
};
}
const channel = ctx.channel;
const target = ctx.senderId?.trim() || ctx.from?.trim() || ctx.to?.trim() || "";
const authLabel = auth.label ?? "auth";

View File

@ -377,3 +377,6 @@ export type { ProcessedLineMessage } from "../line/markdown-to-line.js";
// Media utilities
export { loadWebMedia, type WebMediaResult } from "../web/media.js";
// QR code utilities
export { renderQrPngBase64 } from "../web/qr-image.js";

View File

@ -201,6 +201,19 @@ async function loadWebMediaInternal(
};
};
// Handle data: URLs (base64-encoded inline data)
if (mediaUrl.startsWith("data:")) {
const match = mediaUrl.match(/^data:([^;,]+)?(?:;base64)?,(.*)$/);
if (!match) {
throw new Error("Invalid data: URL format");
}
const contentType = match[1] || "application/octet-stream";
const base64Data = match[2];
const buffer = Buffer.from(base64Data, "base64");
const kind = mediaKindFromMime(contentType);
return await clampAndFinalize({ buffer, contentType, kind });
}
if (/^https?:\/\//i.test(mediaUrl)) {
// Enforce a download cap during fetch to avoid unbounded memory usage.
// For optimized images, allow fetching larger payloads before compression.