Some checks failed
Dev Build / deploy-dev (pull_request) Blocked by required conditions
Dev Build / build-test (pull_request) Waiting to run
Build (Dev) / build-go-backend (pull_request) Failing after 0s
Build (Dev) / trigger-deploy (pull_request) Has been skipped
Build (Dev) / build-frontend (pull_request) Failing after 1s
openclaw/grimm-review All 11 findings resolved. Approved.
🔴 readLoop race: replace WriteControl close with ctx-done goroutine that closes conn 🔴 duplicate event handlers: clear handlers map before re-registering on reconnect 🔴 sync.go CurrentTask abuse: add DisplayName field to UpdateAgentRequest, use it 🔴 sync.go newRole dead code: add Role field to UpdateAgentRequest, use it 🔴 events.go handlePresence DB/SSE inconsistency: pass LastActivityAt in update, don't mutate after DB 🔴 events.go handleAgentConfig DB/SSE inconsistency: use DisplayName/Role fields in update 🟠 Send() nil-conn panic: check conn != nil before WriteJSON 🟠 readLoop prompt ctx cancellation: fixed by item #1 🟠 backoff never resets: reset to initialBackoff after successful connectAndRun 🟠 MarkWSReady double-close race: use sync.Once in Client Extra json:"-" dead fields: removed from sessionChangedPayload, presencePayload, agentConfigPayload UpdateAgentRequest: added DisplayName, Role, LastActivityAt fields
460 lines
13 KiB
Go
460 lines
13 KiB
Go
// Package gateway provides WebSocket client integration with the OpenClaw
|
|
// gateway using WS protocol v3. The WSClient handles connection, handshake,
|
|
// frame routing, request/response correlation, and automatic reconnection
|
|
// with exponential backoff.
|
|
package gateway
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
"sync"
|
|
"time"
|
|
|
|
"code.cubecraftcreations.com/CubeCraft-Creations/Control-Center/go-backend/internal/handler"
|
|
"code.cubecraftcreations.com/CubeCraft-Creations/Control-Center/go-backend/internal/repository"
|
|
|
|
"github.com/gorilla/websocket"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
// WSConfig holds WebSocket client configuration, typically loaded from
|
|
// environment variables. AuthToken must be set to a valid OpenClaw gateway
|
|
// operator token.
|
|
type WSConfig struct {
|
|
URL string // e.g. "ws://host.docker.internal:18789/"
|
|
AuthToken string // from OPENCLAW_GATEWAY_TOKEN
|
|
}
|
|
|
|
// DefaultWSConfig returns sensible defaults for local development.
|
|
func DefaultWSConfig() WSConfig {
|
|
return WSConfig{
|
|
URL: "ws://localhost:18789/",
|
|
AuthToken: "",
|
|
}
|
|
}
|
|
|
|
// eventHandler is a callback invoked when a named event arrives from the
|
|
// gateway.
|
|
type eventHandler func(json.RawMessage)
|
|
|
|
// WSClient connects to the OpenClaw gateway over WebSocket, completes the
|
|
// v3 handshake, routes incoming frames, and automatically reconnects on
|
|
// disconnect with exponential backoff.
|
|
type WSClient struct {
|
|
config WSConfig
|
|
conn *websocket.Conn
|
|
connMu sync.Mutex // protects conn for writes
|
|
pending map[string]chan<- json.RawMessage
|
|
mu sync.Mutex // protects pending and handlers
|
|
agents repository.AgentRepo
|
|
broker *handler.Broker
|
|
logger *slog.Logger
|
|
|
|
handlers map[string][]eventHandler
|
|
connId string // set after successful hello-ok
|
|
restClient *Client // optional REST client to notify on WS ready
|
|
wsReadyOnce sync.Once // ensures MarkWSReady close is one-shot
|
|
}
|
|
|
|
// NewWSClient returns a WSClient wired to the given repository and broker.
|
|
func NewWSClient(cfg WSConfig, agents repository.AgentRepo, broker *handler.Broker, logger *slog.Logger) *WSClient {
|
|
if logger == nil {
|
|
logger = slog.Default()
|
|
}
|
|
return &WSClient{
|
|
config: cfg,
|
|
pending: make(map[string]chan<- json.RawMessage),
|
|
agents: agents,
|
|
broker: broker,
|
|
logger: logger,
|
|
handlers: make(map[string][]eventHandler),
|
|
}
|
|
}
|
|
|
|
// SetRESTClient wires the REST fallback client so the WS client can notify
|
|
// it when the WS connection is ready. Call this before Start.
|
|
func (c *WSClient) SetRESTClient(rest *Client) {
|
|
c.restClient = rest
|
|
}
|
|
|
|
// OnEvent registers a handler for the given event name. Handlers are called
|
|
// when an incoming frame with type "event" and matching event name is
|
|
// received. This is safe to call before Start.
|
|
func (c *WSClient) OnEvent(event string, handler func(json.RawMessage)) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
c.handlers[event] = append(c.handlers[event], handler)
|
|
}
|
|
|
|
// ── Frame types ──────────────────────────────────────────────────────────
|
|
|
|
// wsFrame represents a generic WebSocket frame in the OpenClaw v3 protocol.
|
|
type wsFrame struct {
|
|
Type string `json:"type"` // "req", "res", "event"
|
|
ID string `json:"id,omitempty"` // request/response correlation
|
|
Method string `json:"method,omitempty"` // method name (req frames)
|
|
Event string `json:"event,omitempty"` // event name (event frames)
|
|
Params json.RawMessage `json:"params,omitempty"`
|
|
Result json.RawMessage `json:"result,omitempty"`
|
|
Error *wsError `json:"error,omitempty"`
|
|
}
|
|
|
|
// wsError represents an error in a response frame.
|
|
type wsError struct {
|
|
Code int `json:"code"`
|
|
Message string `json:"message"`
|
|
}
|
|
|
|
// connectRequest builds the initial connect handshake payload.
|
|
type connectRequest struct {
|
|
MinProtocol int `json:"minProtocol"`
|
|
MaxProtocol int `json:"maxProtocol"`
|
|
Client connectClientInfo `json:"client"`
|
|
Role string `json:"role"`
|
|
Scopes []string `json:"scopes"`
|
|
Auth connectAuth `json:"auth"`
|
|
}
|
|
|
|
type connectClientInfo struct {
|
|
ID string `json:"id"`
|
|
Version string `json:"version"`
|
|
Platform string `json:"platform"`
|
|
Mode string `json:"mode"`
|
|
}
|
|
|
|
type connectAuth struct {
|
|
Token string `json:"token"`
|
|
}
|
|
|
|
// helloOKResponse represents the expected response to a successful connect.
|
|
type helloOKResponse struct {
|
|
ConnID string `json:"connId"`
|
|
Features struct {
|
|
Methods []string `json:"methods"`
|
|
Events []string `json:"events"`
|
|
} `json:"features"`
|
|
}
|
|
|
|
// ── Start loop ───────────────────────────────────────────────────────────
|
|
|
|
// Start connects to the gateway, completes the handshake, and begins the
|
|
// read loop. On disconnect it reconnects with exponential backoff. On
|
|
// ctx cancellation it performs a clean shutdown.
|
|
func (c *WSClient) Start(ctx context.Context) {
|
|
initialBackoff := 1 * time.Second
|
|
maxBackoff := 30 * time.Second
|
|
backoff := initialBackoff
|
|
|
|
for {
|
|
err := c.connectAndRun(ctx)
|
|
if err != nil {
|
|
if ctx.Err() != nil {
|
|
c.logger.Info("ws client stopped (context cancelled)")
|
|
return
|
|
}
|
|
c.logger.Warn("ws client disconnected, reconnecting",
|
|
"error", err,
|
|
"backoff", backoff)
|
|
} else {
|
|
// Reset backoff on successful connect+run completion
|
|
backoff = initialBackoff
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
c.logger.Info("ws client stopped during backoff (context cancelled)")
|
|
return
|
|
case <-time.After(backoff):
|
|
// Exponential backoff: 1s, 2s, 4s, 8s, 16s, max 30s
|
|
backoff = backoff * 2
|
|
if backoff > maxBackoff {
|
|
backoff = maxBackoff
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// connectAndRun dials the gateway, completes the handshake, and runs the
|
|
// read loop until an error occurs or ctx is cancelled.
|
|
func (c *WSClient) connectAndRun(ctx context.Context) error {
|
|
c.logger.Info("ws client connecting", "url", c.config.URL)
|
|
|
|
dialer := websocket.Dialer{
|
|
HandshakeTimeout: 10 * time.Second,
|
|
}
|
|
|
|
conn, _, err := dialer.DialContext(ctx, c.config.URL, nil)
|
|
if err != nil {
|
|
return fmt.Errorf("dial failed: %w", err)
|
|
}
|
|
|
|
c.connMu.Lock()
|
|
c.conn = conn
|
|
c.connMu.Unlock()
|
|
|
|
// When context is cancelled, close the conn to unblock ReadJSON in readLoop.
|
|
go func() {
|
|
<-ctx.Done()
|
|
c.connMu.Lock()
|
|
if c.conn != nil {
|
|
c.conn.Close()
|
|
}
|
|
c.connMu.Unlock()
|
|
}()
|
|
|
|
defer func() {
|
|
conn.Close()
|
|
}()
|
|
|
|
// Step 1: Read the connect.challenge frame
|
|
if err := c.readChallenge(conn); err != nil {
|
|
return fmt.Errorf("handshake challenge: %w", err)
|
|
}
|
|
|
|
// Step 2: Send connect request
|
|
helloOK, err := c.sendConnect(conn)
|
|
if err != nil {
|
|
return fmt.Errorf("handshake connect: %w", err)
|
|
}
|
|
|
|
c.logger.Info("ws client handshake complete",
|
|
"connId", helloOK.ConnID,
|
|
"methods", helloOK.Features.Methods,
|
|
"events", helloOK.Features.Events)
|
|
|
|
// Store connId for reference
|
|
c.connMu.Lock()
|
|
c.connId = helloOK.ConnID
|
|
c.connMu.Unlock()
|
|
|
|
// Notify REST client that WS is live so it stands down
|
|
if c.restClient != nil {
|
|
c.restClient.MarkWSReady()
|
|
c.logger.Info("ws client notified REST fallback to stand down")
|
|
}
|
|
|
|
// Reset wsReadyOnce so MarkWSReady can fire again after a reconnect
|
|
c.wsReadyOnce = sync.Once{}
|
|
|
|
// Step 2b: Initial sync — fetch agents + sessions from gateway
|
|
if err := c.initialSync(ctx); err != nil {
|
|
c.logger.Warn("initial sync failed, will continue with read loop", "error", err)
|
|
}
|
|
|
|
// Step 2c: Register live event handlers
|
|
c.registerEventHandlers()
|
|
|
|
// Step 3: Read loop
|
|
return c.readLoop(ctx, conn)
|
|
}
|
|
|
|
// readChallenge reads the first frame from the gateway, which must be a
|
|
// connect.challenge event.
|
|
func (c *WSClient) readChallenge(conn *websocket.Conn) error {
|
|
var frame wsFrame
|
|
if err := conn.ReadJSON(&frame); err != nil {
|
|
return fmt.Errorf("read challenge: %w", err)
|
|
}
|
|
|
|
if frame.Type != "event" || frame.Event != "connect.challenge" {
|
|
return fmt.Errorf("expected connect.challenge, got type=%s event=%s", frame.Type, frame.Event)
|
|
}
|
|
|
|
c.logger.Debug("received connect.challenge", "params", string(frame.Params))
|
|
return nil
|
|
}
|
|
|
|
// sendConnect sends the connect request and waits for the hello-ok response.
|
|
func (c *WSClient) sendConnect(conn *websocket.Conn) (*helloOKResponse, error) {
|
|
reqID := uuid.New().String()
|
|
params := connectRequest{
|
|
MinProtocol: 3,
|
|
MaxProtocol: 3,
|
|
Client: connectClientInfo{
|
|
ID: "control-center",
|
|
Version: "1.0",
|
|
Platform: "server",
|
|
Mode: "operator",
|
|
},
|
|
Role: "operator",
|
|
Scopes: []string{"operator.read"},
|
|
Auth: connectAuth{
|
|
Token: c.config.AuthToken,
|
|
},
|
|
}
|
|
|
|
paramsJSON, err := json.Marshal(params)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("marshal connect params: %w", err)
|
|
}
|
|
|
|
reqFrame := wsFrame{
|
|
Type: "req",
|
|
ID: reqID,
|
|
Method: "connect",
|
|
Params: paramsJSON,
|
|
}
|
|
|
|
if err := conn.WriteJSON(reqFrame); err != nil {
|
|
return nil, fmt.Errorf("write connect request: %w", err)
|
|
}
|
|
|
|
// Read response
|
|
var resFrame wsFrame
|
|
if err := conn.ReadJSON(&resFrame); err != nil {
|
|
return nil, fmt.Errorf("read connect response: %w", err)
|
|
}
|
|
|
|
if resFrame.Error != nil {
|
|
return nil, fmt.Errorf("connect rejected: code=%d msg=%s", resFrame.Error.Code, resFrame.Error.Message)
|
|
}
|
|
|
|
if resFrame.ID != reqID {
|
|
return nil, fmt.Errorf("response id mismatch: expected %s, got %s", reqID, resFrame.ID)
|
|
}
|
|
|
|
// Check for hello-ok method in the result
|
|
// The gateway responds with method "hello-ok" on success
|
|
var helloOK helloOKResponse
|
|
if err := json.Unmarshal(resFrame.Result, &helloOK); err != nil {
|
|
return nil, fmt.Errorf("parse hello-ok: %w", err)
|
|
}
|
|
|
|
return &helloOK, nil
|
|
}
|
|
|
|
// readLoop continuously reads frames from the connection and routes them.
|
|
// It returns on read error or when the connection is closed by the ctx-done
|
|
// goroutine started in connectAndRun.
|
|
func (c *WSClient) readLoop(ctx context.Context, conn *websocket.Conn) error {
|
|
for {
|
|
var frame wsFrame
|
|
if err := conn.ReadJSON(&frame); err != nil {
|
|
if ctx.Err() != nil {
|
|
return ctx.Err()
|
|
}
|
|
// Check if it's a close error
|
|
if websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
|
|
c.logger.Info("ws connection closed by server")
|
|
return nil
|
|
}
|
|
if websocket.IsUnexpectedCloseError(err) {
|
|
c.logger.Warn("ws connection unexpectedly closed", "error", err)
|
|
return err
|
|
}
|
|
return fmt.Errorf("read frame: %w", err)
|
|
}
|
|
|
|
c.routeFrame(frame)
|
|
}
|
|
}
|
|
|
|
// routeFrame dispatches a received frame to the appropriate handler.
|
|
func (c *WSClient) routeFrame(frame wsFrame) {
|
|
switch frame.Type {
|
|
case "res":
|
|
c.handleResponse(frame)
|
|
case "event":
|
|
c.handleEvent(frame)
|
|
default:
|
|
c.logger.Warn("unknown frame type", "type", frame.Type, "id", frame.ID)
|
|
}
|
|
}
|
|
|
|
// handleResponse correlates a response frame to a pending request channel.
|
|
func (c *WSClient) handleResponse(frame wsFrame) {
|
|
c.mu.Lock()
|
|
ch, ok := c.pending[frame.ID]
|
|
if ok {
|
|
delete(c.pending, frame.ID)
|
|
}
|
|
c.mu.Unlock()
|
|
|
|
if !ok {
|
|
c.logger.Warn("received response for unknown request", "id", frame.ID)
|
|
return
|
|
}
|
|
|
|
if frame.Error != nil {
|
|
// Send nil to signal error; caller checks via Send return
|
|
ch <- nil
|
|
return
|
|
}
|
|
|
|
ch <- frame.Result
|
|
}
|
|
|
|
// handleEvent dispatches an event frame to registered handlers.
|
|
func (c *WSClient) handleEvent(frame wsFrame) {
|
|
c.mu.Lock()
|
|
handlers := c.handlers[frame.Event]
|
|
c.mu.Unlock()
|
|
|
|
if len(handlers) == 0 {
|
|
c.logger.Debug("unhandled event", "event", frame.Event)
|
|
return
|
|
}
|
|
|
|
for _, h := range handlers {
|
|
h(frame.Params)
|
|
}
|
|
}
|
|
|
|
// ── Send ─────────────────────────────────────────────────────────────────
|
|
|
|
// Send sends a JSON request to the gateway and returns the response payload.
|
|
// It is safe for concurrent use. Returns an error if the client is not
|
|
// connected.
|
|
func (c *WSClient) Send(method string, params any) (json.RawMessage, error) {
|
|
reqID := uuid.New().String()
|
|
|
|
paramsJSON, err := json.Marshal(params)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("marshal params: %w", err)
|
|
}
|
|
|
|
// Register pending response channel
|
|
respCh := make(chan json.RawMessage, 1)
|
|
c.mu.Lock()
|
|
c.pending[reqID] = respCh
|
|
c.mu.Unlock()
|
|
|
|
defer func() {
|
|
c.mu.Lock()
|
|
delete(c.pending, reqID)
|
|
c.mu.Unlock()
|
|
}()
|
|
|
|
// Build and send frame
|
|
frame := wsFrame{
|
|
Type: "req",
|
|
ID: reqID,
|
|
Method: method,
|
|
Params: paramsJSON,
|
|
}
|
|
|
|
c.connMu.Lock()
|
|
if c.conn == nil {
|
|
c.connMu.Unlock()
|
|
return nil, fmt.Errorf("gateway: not connected")
|
|
}
|
|
err = c.conn.WriteJSON(frame)
|
|
c.connMu.Unlock()
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("write request: %w", err)
|
|
}
|
|
|
|
// Wait for response with timeout
|
|
select {
|
|
case resp := <-respCh:
|
|
if resp == nil {
|
|
return nil, fmt.Errorf("gateway returned error for request %s", reqID)
|
|
}
|
|
return resp, nil
|
|
case <-time.After(30 * time.Second):
|
|
return nil, fmt.Errorf("request %s timed out", reqID)
|
|
}
|
|
} |