mirror of
https://fastgit.cc/github.com/openclaw/openclaw
synced 2026-04-20 21:02:10 +08:00
feat: add macOS screen snapshots for monitor preview (#67954) thanks @BunsDev
Co-authored-by: Val Alexander <68980965+BunsDev@users.noreply.github.com>
This commit is contained in:
@@ -4,6 +4,10 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Changes
|
||||
|
||||
- macOS/gateway: add `screen.snapshot` support for macOS app nodes, including runtime plumbing, default macOS allowlisting, and docs for monitor preview flows. (#67954) Thanks @BunsDev.
|
||||
|
||||
### Fixes
|
||||
|
||||
- Onboarding/non-interactive: preserve existing gateway auth tokens during re-onboard so active local gateway clients are not disconnected by an implicit token rotation. (#67821) Thanks @BKF-Gitty.
|
||||
|
||||
@@ -146,6 +146,7 @@ final class MacNodeModeCoordinator {
|
||||
OpenClawCanvasA2UICommand.push.rawValue,
|
||||
OpenClawCanvasA2UICommand.pushJSONL.rawValue,
|
||||
OpenClawCanvasA2UICommand.reset.rawValue,
|
||||
MacNodeScreenCommand.snapshot.rawValue,
|
||||
MacNodeScreenCommand.record.rawValue,
|
||||
OpenClawSystemCommand.notify.rawValue,
|
||||
OpenClawSystemCommand.which.rawValue,
|
||||
|
||||
@@ -63,6 +63,8 @@ actor MacNodeRuntime {
|
||||
return try await self.handleCameraInvoke(req)
|
||||
case OpenClawLocationCommand.get.rawValue:
|
||||
return try await self.handleLocationInvoke(req)
|
||||
case MacNodeScreenCommand.snapshot.rawValue:
|
||||
return try await self.handleScreenSnapshotInvoke(req)
|
||||
case MacNodeScreenCommand.record.rawValue:
|
||||
return try await self.handleScreenRecordInvoke(req)
|
||||
case OpenClawSystemCommand.run.rawValue:
|
||||
@@ -352,6 +354,34 @@ actor MacNodeRuntime {
|
||||
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
|
||||
}
|
||||
|
||||
private func handleScreenSnapshotInvoke(_ req: BridgeInvokeRequest) async throws -> BridgeInvokeResponse {
|
||||
let params = (try? Self.decodeParams(MacNodeScreenSnapshotParams.self, from: req.paramsJSON)) ??
|
||||
MacNodeScreenSnapshotParams()
|
||||
let services = await self.mainActorServices()
|
||||
let capturedAtMs = Int64(Date().timeIntervalSince1970 * 1000)
|
||||
let res = try await services.snapshotScreen(
|
||||
screenIndex: params.screenIndex,
|
||||
maxWidth: params.maxWidth,
|
||||
quality: params.quality,
|
||||
format: params.format)
|
||||
struct ScreenSnapshotPayload: Encodable {
|
||||
var format: String
|
||||
var base64: String
|
||||
var width: Int
|
||||
var height: Int
|
||||
var screenIndex: Int?
|
||||
var capturedAtMs: Int64
|
||||
}
|
||||
let payload = try Self.encodePayload(ScreenSnapshotPayload(
|
||||
format: res.format.rawValue,
|
||||
base64: res.data.base64EncodedString(),
|
||||
width: res.width,
|
||||
height: res.height,
|
||||
screenIndex: params.screenIndex,
|
||||
capturedAtMs: capturedAtMs))
|
||||
return BridgeInvokeResponse(id: req.id, ok: true, payloadJSON: payload)
|
||||
}
|
||||
|
||||
private func mainActorServices() async -> any MacNodeRuntimeMainActorServices {
|
||||
if let cachedMainActorServices { return cachedMainActorServices }
|
||||
let services = await self.makeMainActorServices()
|
||||
|
||||
@@ -4,6 +4,13 @@ import OpenClawKit
|
||||
|
||||
@MainActor
|
||||
protocol MacNodeRuntimeMainActorServices: Sendable {
|
||||
func snapshotScreen(
|
||||
screenIndex: Int?,
|
||||
maxWidth: Int?,
|
||||
quality: Double?,
|
||||
format: OpenClawScreenSnapshotFormat?) async throws
|
||||
-> (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
|
||||
|
||||
func recordScreen(
|
||||
screenIndex: Int?,
|
||||
durationMs: Int?,
|
||||
@@ -21,9 +28,24 @@ protocol MacNodeRuntimeMainActorServices: Sendable {
|
||||
|
||||
@MainActor
|
||||
final class LiveMacNodeRuntimeMainActorServices: MacNodeRuntimeMainActorServices, @unchecked Sendable {
|
||||
private let screenSnapshotter = ScreenSnapshotService()
|
||||
private let screenRecorder = ScreenRecordService()
|
||||
private let locationService = MacNodeLocationService()
|
||||
|
||||
func snapshotScreen(
|
||||
screenIndex: Int?,
|
||||
maxWidth: Int?,
|
||||
quality: Double?,
|
||||
format: OpenClawScreenSnapshotFormat?) async throws
|
||||
-> (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
|
||||
{
|
||||
try await self.screenSnapshotter.snapshot(
|
||||
screenIndex: screenIndex,
|
||||
maxWidth: maxWidth,
|
||||
quality: quality,
|
||||
format: format)
|
||||
}
|
||||
|
||||
func recordScreen(
|
||||
screenIndex: Int?,
|
||||
durationMs: Int?,
|
||||
|
||||
@@ -1,9 +1,18 @@
|
||||
import Foundation
|
||||
import OpenClawKit
|
||||
|
||||
enum MacNodeScreenCommand: String, Codable {
|
||||
case snapshot = "screen.snapshot"
|
||||
case record = "screen.record"
|
||||
}
|
||||
|
||||
struct MacNodeScreenSnapshotParams: Codable, Equatable {
|
||||
var screenIndex: Int?
|
||||
var maxWidth: Int?
|
||||
var quality: Double?
|
||||
var format: OpenClawScreenSnapshotFormat?
|
||||
}
|
||||
|
||||
struct MacNodeScreenRecordParams: Codable, Equatable {
|
||||
var screenIndex: Int?
|
||||
var durationMs: Int?
|
||||
|
||||
109
apps/macos/Sources/OpenClaw/ScreenSnapshotService.swift
Normal file
109
apps/macos/Sources/OpenClaw/ScreenSnapshotService.swift
Normal file
@@ -0,0 +1,109 @@
|
||||
import AppKit
|
||||
import Foundation
|
||||
import OpenClawKit
|
||||
@preconcurrency import ScreenCaptureKit
|
||||
|
||||
@MainActor
|
||||
final class ScreenSnapshotService {
|
||||
enum ScreenSnapshotError: LocalizedError {
|
||||
case noDisplays
|
||||
case invalidScreenIndex(Int)
|
||||
case captureFailed(String)
|
||||
case encodeFailed(String)
|
||||
|
||||
var errorDescription: String? {
|
||||
switch self {
|
||||
case .noDisplays:
|
||||
"No displays available for screen snapshot"
|
||||
case let .invalidScreenIndex(idx):
|
||||
"Invalid screen index \(idx)"
|
||||
case let .captureFailed(message):
|
||||
message
|
||||
case let .encodeFailed(message):
|
||||
message
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func snapshot(
|
||||
screenIndex: Int?,
|
||||
maxWidth: Int?,
|
||||
quality: Double?,
|
||||
format: OpenClawScreenSnapshotFormat?) async throws
|
||||
-> (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
|
||||
{
|
||||
let format = format ?? .jpeg
|
||||
let normalized = Self.normalize(maxWidth: maxWidth, quality: quality, format: format)
|
||||
|
||||
let content = try await SCShareableContent.current
|
||||
let displays = content.displays.sorted { $0.displayID < $1.displayID }
|
||||
guard !displays.isEmpty else {
|
||||
throw ScreenSnapshotError.noDisplays
|
||||
}
|
||||
|
||||
let idx = screenIndex ?? 0
|
||||
guard idx >= 0, idx < displays.count else {
|
||||
throw ScreenSnapshotError.invalidScreenIndex(idx)
|
||||
}
|
||||
let display = displays[idx]
|
||||
|
||||
let filter = SCContentFilter(display: display, excludingWindows: [])
|
||||
let config = SCStreamConfiguration()
|
||||
let targetSize = Self.targetSize(
|
||||
width: display.width,
|
||||
height: display.height,
|
||||
maxWidth: normalized.maxWidth)
|
||||
config.width = targetSize.width
|
||||
config.height = targetSize.height
|
||||
config.showsCursor = true
|
||||
|
||||
let cgImage: CGImage
|
||||
do {
|
||||
cgImage = try await SCScreenshotManager.captureImage(
|
||||
contentFilter: filter,
|
||||
configuration: config)
|
||||
} catch {
|
||||
throw ScreenSnapshotError.captureFailed(error.localizedDescription)
|
||||
}
|
||||
|
||||
let bitmap = NSBitmapImageRep(cgImage: cgImage)
|
||||
let data: Data
|
||||
switch format {
|
||||
case .png:
|
||||
guard let encoded = bitmap.representation(using: .png, properties: [:]) else {
|
||||
throw ScreenSnapshotError.encodeFailed("png encode failed")
|
||||
}
|
||||
data = encoded
|
||||
case .jpeg:
|
||||
guard let encoded = bitmap.representation(
|
||||
using: .jpeg,
|
||||
properties: [.compressionFactor: normalized.quality])
|
||||
else {
|
||||
throw ScreenSnapshotError.encodeFailed("jpeg encode failed")
|
||||
}
|
||||
data = encoded
|
||||
}
|
||||
|
||||
return (data: data, format: format, width: cgImage.width, height: cgImage.height)
|
||||
}
|
||||
|
||||
private static func normalize(
|
||||
maxWidth: Int?,
|
||||
quality: Double?,
|
||||
format: OpenClawScreenSnapshotFormat)
|
||||
-> (maxWidth: Int, quality: Double)
|
||||
{
|
||||
let resolvedMaxWidth = maxWidth.flatMap { $0 > 0 ? $0 : nil } ?? (format == .png ? 900 : 1600)
|
||||
let resolvedQuality = min(1.0, max(0.05, quality ?? 0.72))
|
||||
return (maxWidth: resolvedMaxWidth, quality: resolvedQuality)
|
||||
}
|
||||
|
||||
private static func targetSize(width: Int, height: Int, maxWidth: Int) -> (width: Int, height: Int) {
|
||||
guard width > 0, height > 0, width > maxWidth else {
|
||||
return (width: width, height: height)
|
||||
}
|
||||
let scale = Double(maxWidth) / Double(width)
|
||||
let targetHeight = max(1, Int((Double(height) * scale).rounded()))
|
||||
return (width: maxWidth, height: targetHeight)
|
||||
}
|
||||
}
|
||||
@@ -78,6 +78,19 @@ struct MacNodeRuntimeTests {
|
||||
@Test func `handle invoke screen record uses injected services`() async throws {
|
||||
@MainActor
|
||||
final class FakeMainActorServices: MacNodeRuntimeMainActorServices, @unchecked Sendable {
|
||||
func snapshotScreen(
|
||||
screenIndex: Int?,
|
||||
maxWidth: Int?,
|
||||
quality: Double?,
|
||||
format: OpenClawScreenSnapshotFormat?) async throws
|
||||
-> (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
|
||||
{
|
||||
_ = screenIndex
|
||||
_ = maxWidth
|
||||
_ = quality
|
||||
return (Data("snapshot".utf8), format ?? .jpeg, 640, 360)
|
||||
}
|
||||
|
||||
func recordScreen(
|
||||
screenIndex: Int?,
|
||||
durationMs: Int?,
|
||||
@@ -127,6 +140,94 @@ struct MacNodeRuntimeTests {
|
||||
#expect(!payload.base64.isEmpty)
|
||||
}
|
||||
|
||||
@Test func `handle invoke screen snapshot uses injected services`() async throws {
|
||||
@MainActor
|
||||
final class FakeMainActorServices: MacNodeRuntimeMainActorServices, @unchecked Sendable {
|
||||
var snapshotCalledAtMs: Int64?
|
||||
|
||||
func snapshotScreen(
|
||||
screenIndex: Int?,
|
||||
maxWidth: Int?,
|
||||
quality: Double?,
|
||||
format: OpenClawScreenSnapshotFormat?) async throws
|
||||
-> (data: Data, format: OpenClawScreenSnapshotFormat, width: Int, height: Int)
|
||||
{
|
||||
self.snapshotCalledAtMs = Int64(Date().timeIntervalSince1970 * 1000)
|
||||
#expect(screenIndex == 0)
|
||||
#expect(maxWidth == 800)
|
||||
#expect(quality == 0.5)
|
||||
return (Data("ok".utf8), format ?? .jpeg, 800, 450)
|
||||
}
|
||||
|
||||
func recordScreen(
|
||||
screenIndex: Int?,
|
||||
durationMs: Int?,
|
||||
fps: Double?,
|
||||
includeAudio: Bool?,
|
||||
outPath: String?) async throws -> (path: String, hasAudio: Bool)
|
||||
{
|
||||
let url = FileManager().temporaryDirectory
|
||||
.appendingPathComponent("openclaw-test-screen-record-\(UUID().uuidString).mp4")
|
||||
try Data("ok".utf8).write(to: url)
|
||||
return (path: url.path, hasAudio: false)
|
||||
}
|
||||
|
||||
func locationAuthorizationStatus() -> CLAuthorizationStatus {
|
||||
.authorizedAlways
|
||||
}
|
||||
|
||||
func locationAccuracyAuthorization() -> CLAccuracyAuthorization {
|
||||
.fullAccuracy
|
||||
}
|
||||
|
||||
func currentLocation(
|
||||
desiredAccuracy: OpenClawLocationAccuracy,
|
||||
maxAgeMs: Int?,
|
||||
timeoutMs: Int?) async throws -> CLLocation
|
||||
{
|
||||
_ = desiredAccuracy
|
||||
_ = maxAgeMs
|
||||
_ = timeoutMs
|
||||
return CLLocation(latitude: 0, longitude: 0)
|
||||
}
|
||||
}
|
||||
|
||||
let services = await MainActor.run { FakeMainActorServices() }
|
||||
let runtime = MacNodeRuntime(makeMainActorServices: { services })
|
||||
|
||||
let params = MacNodeScreenSnapshotParams(
|
||||
screenIndex: 0,
|
||||
maxWidth: 800,
|
||||
quality: 0.5,
|
||||
format: .jpeg)
|
||||
let json = try String(data: JSONEncoder().encode(params), encoding: .utf8)
|
||||
let response = await runtime.handleInvoke(
|
||||
BridgeInvokeRequest(
|
||||
id: "req-screen-snapshot",
|
||||
command: MacNodeScreenCommand.snapshot.rawValue,
|
||||
paramsJSON: json))
|
||||
#expect(response.ok == true)
|
||||
let payloadJSON = try #require(response.payloadJSON)
|
||||
|
||||
struct Payload: Decodable {
|
||||
var format: String
|
||||
var base64: String
|
||||
var width: Int
|
||||
var height: Int
|
||||
var capturedAtMs: Int64
|
||||
}
|
||||
|
||||
let payload = try JSONDecoder().decode(Payload.self, from: Data(payloadJSON.utf8))
|
||||
#expect(payload.format == "jpeg")
|
||||
#expect(payload.base64 == Data("ok".utf8).base64EncodedString())
|
||||
#expect(payload.width == 800)
|
||||
#expect(payload.height == 450)
|
||||
#expect(payload.capturedAtMs > 0)
|
||||
let snapshotCalledAtMs = await MainActor.run { services.snapshotCalledAtMs }
|
||||
#expect(snapshotCalledAtMs != nil)
|
||||
#expect(payload.capturedAtMs <= snapshotCalledAtMs!)
|
||||
}
|
||||
|
||||
@Test func `handle invoke browser proxy uses injected request`() async {
|
||||
let runtime = MacNodeRuntime(browserProxyRequest: { paramsJSON in
|
||||
#expect(paramsJSON?.contains("/tabs") == true)
|
||||
|
||||
@@ -1,9 +1,34 @@
|
||||
import Foundation
|
||||
|
||||
public enum OpenClawScreenCommand: String, Codable, Sendable {
|
||||
case snapshot = "screen.snapshot"
|
||||
case record = "screen.record"
|
||||
}
|
||||
|
||||
public enum OpenClawScreenSnapshotFormat: String, Codable, Sendable {
|
||||
case jpeg
|
||||
case png
|
||||
}
|
||||
|
||||
public struct OpenClawScreenSnapshotParams: Codable, Sendable, Equatable {
|
||||
public var screenIndex: Int?
|
||||
public var maxWidth: Int?
|
||||
public var quality: Double?
|
||||
public var format: OpenClawScreenSnapshotFormat?
|
||||
|
||||
public init(
|
||||
screenIndex: Int? = nil,
|
||||
maxWidth: Int? = nil,
|
||||
quality: Double? = nil,
|
||||
format: OpenClawScreenSnapshotFormat? = nil)
|
||||
{
|
||||
self.screenIndex = screenIndex
|
||||
self.maxWidth = maxWidth
|
||||
self.quality = quality
|
||||
self.format = format
|
||||
}
|
||||
}
|
||||
|
||||
public struct OpenClawScreenRecordParams: Codable, Sendable, Equatable {
|
||||
public var screenIndex: Int?
|
||||
public var durationMs: Int?
|
||||
|
||||
@@ -55,7 +55,7 @@ The macOS app presents itself as a node. Common commands:
|
||||
|
||||
- Canvas: `canvas.present`, `canvas.navigate`, `canvas.eval`, `canvas.snapshot`, `canvas.a2ui.*`
|
||||
- Camera: `camera.snap`, `camera.clip`
|
||||
- Screen: `screen.record`
|
||||
- Screen: `screen.snapshot`, `screen.record`
|
||||
- System: `system.run`, `system.notify`
|
||||
|
||||
The node reports a `permissions` map so agents can decide what’s allowed.
|
||||
|
||||
@@ -367,6 +367,21 @@ describe("resolveNodeCommandAllowlist", () => {
|
||||
expect(DEFAULT_DANGEROUS_NODE_COMMANDS).toContain("sms.search");
|
||||
});
|
||||
|
||||
it("allows macOS screen.snapshot by default but keeps screen.record gated", () => {
|
||||
const allow = resolveNodeCommandAllowlist(
|
||||
{},
|
||||
{
|
||||
platform: "macOS 26.3.1",
|
||||
deviceFamily: "Mac",
|
||||
},
|
||||
);
|
||||
|
||||
expect(DEFAULT_DANGEROUS_NODE_COMMANDS).not.toContain("screen.snapshot");
|
||||
expect(DEFAULT_DANGEROUS_NODE_COMMANDS).toContain("screen.record");
|
||||
expect(allow.has("screen.snapshot")).toBe(true);
|
||||
expect(allow.has("screen.record")).toBe(false);
|
||||
});
|
||||
|
||||
it("can explicitly allow dangerous commands via allowCommands", () => {
|
||||
const allow = resolveNodeCommandAllowlist(
|
||||
{
|
||||
|
||||
@@ -21,6 +21,7 @@ const CANVAS_COMMANDS = [
|
||||
const CAMERA_COMMANDS = ["camera.list"];
|
||||
const CAMERA_DANGEROUS_COMMANDS = ["camera.snap", "camera.clip"];
|
||||
|
||||
const SCREEN_COMMANDS = ["screen.snapshot"];
|
||||
const SCREEN_DANGEROUS_COMMANDS = ["screen.record"];
|
||||
|
||||
const LOCATION_COMMANDS = ["location.get"];
|
||||
@@ -111,6 +112,7 @@ const PLATFORM_DEFAULTS: Record<string, string[]> = {
|
||||
...PHOTOS_COMMANDS,
|
||||
...MOTION_COMMANDS,
|
||||
...SYSTEM_COMMANDS,
|
||||
...SCREEN_COMMANDS,
|
||||
],
|
||||
linux: [...SYSTEM_COMMANDS],
|
||||
windows: [...SYSTEM_COMMANDS],
|
||||
|
||||
@@ -95,6 +95,8 @@ describe("configureGatewayForSetup", () => {
|
||||
|
||||
expect(result.settings.gatewayToken).toBe("generated-token");
|
||||
expect(result.nextConfig.gateway?.nodes?.denyCommands).toEqual(DEFAULT_DANGEROUS_NODE_COMMANDS);
|
||||
expect(result.nextConfig.gateway?.nodes?.denyCommands).not.toContain("screen.snapshot");
|
||||
expect(result.nextConfig.gateway?.nodes?.denyCommands).toContain("screen.record");
|
||||
});
|
||||
|
||||
it("prefers OPENCLAW_GATEWAY_TOKEN during quickstart token setup", async () => {
|
||||
|
||||
Reference in New Issue
Block a user