CUB-200: implement WebSocket gateway client with v3 protocol
Replace REST poller with WebSocket client as primary gateway connection: - wsclient.go: WebSocket client with v3 handshake (connect.challenge → connect → hello-ok), frame routing (req/res/event), JSON-RPC Send(), auto-reconnect with exponential backoff (1s → 30s max) - sync.go: Initial sync via agents.list + sessions.list RPCs, merge session runtime state into AgentCardData, broadcast fleet.update - events.go: Real-time event handlers for sessions.changed, presence, and agent.config — DB update first, then SSE broadcast - client.go: REST poller retained as fallback (WS is primary) - config.go: Add GATEWAY_WS_URL and OPENCLAW_GATEWAY_TOKEN env vars - main.go: Wire WS client as primary, REST as fallback - .env.example: Document new WS config vars Fallback: If WS connection fails, seeded demo data + REST polling remain available.
This commit is contained in:
443
go-backend/internal/gateway/wsclient.go
Normal file
443
go-backend/internal/gateway/wsclient.go
Normal file
@@ -0,0 +1,443 @@
|
||||
// Package gateway provides WebSocket client integration with the OpenClaw
|
||||
// gateway using WS protocol v3. The WSClient handles connection, handshake,
|
||||
// frame routing, request/response correlation, and automatic reconnection
|
||||
// with exponential backoff (1s → 30s max).
|
||||
package gateway
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"code.cubecraftcreations.com/CubeCraft-Creations/Control-Center/go-backend/internal/handler"
|
||||
"code.cubecraftcreations.com/CubeCraft-Creations/Control-Center/go-backend/internal/repository"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/gorilla/websocket"
|
||||
)
|
||||
|
||||
// WSConfig holds WebSocket client configuration, typically loaded from
|
||||
// environment variables. AuthToken must be set to a valid OpenClaw gateway
|
||||
// operator token.
|
||||
type WSConfig struct {
|
||||
URL string // e.g. "ws://host.docker.internal:18789/"
|
||||
AuthToken string // from OPENCLAW_GATEWAY_TOKEN
|
||||
}
|
||||
|
||||
// DefaultWSConfig returns sensible defaults for local development.
|
||||
func DefaultWSConfig() WSConfig {
|
||||
return WSConfig{
|
||||
URL: "ws://localhost:18789/",
|
||||
AuthToken: "",
|
||||
}
|
||||
}
|
||||
|
||||
// eventHandler is a callback invoked when a named event arrives from the
|
||||
// gateway.
|
||||
type eventHandler func(json.RawMessage)
|
||||
|
||||
// WSClient connects to the OpenClaw gateway over WebSocket, completes the
|
||||
// v3 handshake, routes incoming frames, and automatically reconnects on
|
||||
// disconnect with exponential backoff (1s → 30s max).
|
||||
type WSClient struct {
|
||||
config WSConfig
|
||||
conn *websocket.Conn
|
||||
connMu sync.Mutex // protects conn for writes
|
||||
pending map[string]chan<- json.RawMessage
|
||||
mu sync.Mutex // protects pending and handlers
|
||||
agents repository.AgentRepo
|
||||
broker *handler.Broker
|
||||
logger *slog.Logger
|
||||
handlers map[string][]eventHandler
|
||||
connID string // set after successful hello-ok
|
||||
restClient *Client // optional REST client to notify on WS ready
|
||||
}
|
||||
|
||||
// NewWSClient returns a WSClient wired to the given repository and broker.
|
||||
func NewWSClient(cfg WSConfig, agents repository.AgentRepo, broker *handler.Broker, logger *slog.Logger) *WSClient {
|
||||
if logger == nil {
|
||||
logger = slog.Default()
|
||||
}
|
||||
return &WSClient{
|
||||
config: cfg,
|
||||
pending: make(map[string]chan<- json.RawMessage),
|
||||
agents: agents,
|
||||
broker: broker,
|
||||
logger: logger,
|
||||
handlers: make(map[string][]eventHandler),
|
||||
}
|
||||
}
|
||||
|
||||
// SetRESTClient wires the REST fallback client so the WS client can notify
|
||||
// it when the WS connection is ready. Call this before Start.
|
||||
func (c *WSClient) SetRESTClient(rest *Client) {
|
||||
c.restClient = rest
|
||||
}
|
||||
|
||||
// OnEvent registers a handler for the given event name. Handlers are called
|
||||
// when an incoming frame with type "event" and matching event name is
|
||||
// received. Safe to call before Start.
|
||||
func (c *WSClient) OnEvent(event string, handler func(json.RawMessage)) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.handlers[event] = append(c.handlers[event], handler)
|
||||
}
|
||||
|
||||
// ── Frame types ──────────────────────────────────────────────────────────
|
||||
|
||||
// wsFrame represents a generic WebSocket frame in the OpenClaw v3 protocol.
|
||||
type wsFrame struct {
|
||||
Type string `json:"type"` // "req", "res", "event"
|
||||
ID string `json:"id,omitempty"` // request/response correlation
|
||||
Method string `json:"method,omitempty"` // method name (req/res frames)
|
||||
Event string `json:"event,omitempty"` // event name (event frames)
|
||||
Params json.RawMessage `json:"params,omitempty"`
|
||||
Result json.RawMessage `json:"result,omitempty"`
|
||||
Error *wsError `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// wsError represents an error in a response frame.
|
||||
type wsError struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// connectRequest builds the initial connect handshake payload.
|
||||
type connectRequest struct {
|
||||
MinProtocol int `json:"minProtocol"`
|
||||
MaxProtocol int `json:"maxProtocol"`
|
||||
Client connectClientInfo `json:"client"`
|
||||
Role string `json:"role"`
|
||||
Scopes []string `json:"scopes"`
|
||||
Auth connectAuth `json:"auth"`
|
||||
}
|
||||
|
||||
type connectClientInfo struct {
|
||||
ID string `json:"id"`
|
||||
Version string `json:"version"`
|
||||
Platform string `json:"platform"`
|
||||
Mode string `json:"mode"`
|
||||
}
|
||||
|
||||
type connectAuth struct {
|
||||
Token string `json:"token"`
|
||||
}
|
||||
|
||||
// helloOKResponse represents the expected response to a successful connect.
|
||||
type helloOKResponse struct {
|
||||
ConnID string `json:"connId"`
|
||||
Features struct {
|
||||
Methods []string `json:"methods"`
|
||||
Events []string `json:"events"`
|
||||
} `json:"features"`
|
||||
}
|
||||
|
||||
// ── Start loop ───────────────────────────────────────────────────────────
|
||||
|
||||
// Start connects to the gateway, completes the handshake, and begins the
|
||||
// read loop. On disconnect it reconnects with exponential backoff (1s → 30s).
|
||||
// On ctx cancellation it performs a clean shutdown.
|
||||
func (c *WSClient) Start(ctx context.Context) {
|
||||
backoff := 1 * time.Second
|
||||
maxBackoff := 30 * time.Second
|
||||
|
||||
for {
|
||||
err := c.connectAndRun(ctx)
|
||||
if err != nil {
|
||||
if ctx.Err() != nil {
|
||||
c.logger.Info("ws client stopped (context cancelled)")
|
||||
return
|
||||
}
|
||||
c.logger.Warn("ws client disconnected, reconnecting",
|
||||
"error", err,
|
||||
"backoff", backoff)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
c.logger.Info("ws client stopped during backoff (context cancelled)")
|
||||
return
|
||||
case <-time.After(backoff):
|
||||
// Exponential backoff: 1s, 2s, 4s, 8s, 16s, max 30s
|
||||
backoff = backoff * 2
|
||||
if backoff > maxBackoff {
|
||||
backoff = maxBackoff
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// connectAndRun dials the gateway, completes the handshake, and runs the
|
||||
// read loop until an error occurs or ctx is cancelled.
|
||||
func (c *WSClient) connectAndRun(ctx context.Context) error {
|
||||
c.logger.Info("ws client connecting", "url", c.config.URL)
|
||||
|
||||
dialer := websocket.Dialer{
|
||||
HandshakeTimeout: 10 * time.Second,
|
||||
}
|
||||
|
||||
conn, _, err := dialer.DialContext(ctx, c.config.URL, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("dial failed: %w", err)
|
||||
}
|
||||
|
||||
c.connMu.Lock()
|
||||
c.conn = conn
|
||||
c.connMu.Unlock()
|
||||
|
||||
defer conn.Close()
|
||||
|
||||
// Step 1: Read the connect.challenge frame
|
||||
if err := c.readChallenge(conn); err != nil {
|
||||
return fmt.Errorf("handshake challenge: %w", err)
|
||||
}
|
||||
|
||||
// Step 2: Send connect request and read hello-ok response
|
||||
helloOK, err := c.sendConnect(conn)
|
||||
if err != nil {
|
||||
return fmt.Errorf("handshake connect: %w", err)
|
||||
}
|
||||
|
||||
c.logger.Info("ws client handshake complete",
|
||||
"connId", helloOK.ConnID,
|
||||
"methods", helloOK.Features.Methods,
|
||||
"events", helloOK.Features.Events)
|
||||
|
||||
c.connMu.Lock()
|
||||
c.connID = helloOK.ConnID
|
||||
c.connMu.Unlock()
|
||||
|
||||
// Notify REST client that WS is live so it stands down
|
||||
if c.restClient != nil {
|
||||
c.restClient.MarkWSReady()
|
||||
c.logger.Info("ws client notified REST fallback to stand down")
|
||||
}
|
||||
|
||||
// Step 3: Initial sync — fetch agents + sessions from gateway
|
||||
if err := c.initialSync(ctx); err != nil {
|
||||
c.logger.Warn("initial sync failed, continuing with read loop", "error", err)
|
||||
}
|
||||
|
||||
// Step 4: Register live event handlers
|
||||
c.registerEventHandlers()
|
||||
|
||||
// Step 5: Read loop — blocks until disconnect or ctx cancel
|
||||
return c.readLoop(ctx, conn)
|
||||
}
|
||||
|
||||
// readChallenge reads the first frame from the gateway, which must be a
|
||||
// connect.challenge event.
|
||||
func (c *WSClient) readChallenge(conn *websocket.Conn) error {
|
||||
var frame wsFrame
|
||||
if err := conn.ReadJSON(&frame); err != nil {
|
||||
return fmt.Errorf("read challenge: %w", err)
|
||||
}
|
||||
|
||||
if frame.Type != "event" || frame.Event != "connect.challenge" {
|
||||
return fmt.Errorf("expected connect.challenge, got type=%s event=%s", frame.Type, frame.Event)
|
||||
}
|
||||
|
||||
c.logger.Debug("received connect.challenge")
|
||||
return nil
|
||||
}
|
||||
|
||||
// sendConnect sends the connect request and waits for the hello-ok response.
|
||||
func (c *WSClient) sendConnect(conn *websocket.Conn) (*helloOKResponse, error) {
|
||||
reqID := uuid.New().String()
|
||||
params := connectRequest{
|
||||
MinProtocol: 3,
|
||||
MaxProtocol: 3,
|
||||
Client: connectClientInfo{
|
||||
ID: "control-center",
|
||||
Version: "1.0",
|
||||
Platform: "server",
|
||||
Mode: "operator",
|
||||
},
|
||||
Role: "operator",
|
||||
Scopes: []string{"operator.read"},
|
||||
Auth: connectAuth{
|
||||
Token: c.config.AuthToken,
|
||||
},
|
||||
}
|
||||
|
||||
paramsJSON, err := json.Marshal(params)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal connect params: %w", err)
|
||||
}
|
||||
|
||||
reqFrame := wsFrame{
|
||||
Type: "req",
|
||||
ID: reqID,
|
||||
Method: "connect",
|
||||
Params: paramsJSON,
|
||||
}
|
||||
|
||||
if err := conn.WriteJSON(reqFrame); err != nil {
|
||||
return nil, fmt.Errorf("write connect request: %w", err)
|
||||
}
|
||||
|
||||
// Read response
|
||||
var resFrame wsFrame
|
||||
if err := conn.ReadJSON(&resFrame); err != nil {
|
||||
return nil, fmt.Errorf("read connect response: %w", err)
|
||||
}
|
||||
|
||||
if resFrame.Error != nil {
|
||||
return nil, fmt.Errorf("connect rejected: code=%d msg=%s", resFrame.Error.Code, resFrame.Error.Message)
|
||||
}
|
||||
|
||||
if resFrame.ID != reqID {
|
||||
return nil, fmt.Errorf("response id mismatch: expected %s, got %s", reqID, resFrame.ID)
|
||||
}
|
||||
|
||||
var helloOK helloOKResponse
|
||||
if err := json.Unmarshal(resFrame.Result, &helloOK); err != nil {
|
||||
return nil, fmt.Errorf("parse hello-ok: %w", err)
|
||||
}
|
||||
|
||||
return &helloOK, nil
|
||||
}
|
||||
|
||||
// readLoop continuously reads frames from the connection and routes them.
|
||||
// It returns on read error or context cancellation.
|
||||
func (c *WSClient) readLoop(ctx context.Context, conn *websocket.Conn) error {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// Clean shutdown: send close frame
|
||||
c.connMu.Lock()
|
||||
c.conn.WriteControl(
|
||||
websocket.CloseMessage,
|
||||
websocket.FormatCloseMessage(websocket.CloseNormalClosure, "shutdown"),
|
||||
time.Now().Add(5*time.Second),
|
||||
)
|
||||
c.connMu.Unlock()
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
var frame wsFrame
|
||||
if err := conn.ReadJSON(&frame); err != nil {
|
||||
if websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
|
||||
c.logger.Info("ws connection closed by server")
|
||||
return nil
|
||||
}
|
||||
if websocket.IsUnexpectedCloseError(err) {
|
||||
c.logger.Warn("ws connection unexpectedly closed", "error", err)
|
||||
return err
|
||||
}
|
||||
return fmt.Errorf("read frame: %w", err)
|
||||
}
|
||||
|
||||
c.routeFrame(frame)
|
||||
}
|
||||
}
|
||||
|
||||
// routeFrame dispatches a received frame to the appropriate handler.
|
||||
func (c *WSClient) routeFrame(frame wsFrame) {
|
||||
switch frame.Type {
|
||||
case "res":
|
||||
c.handleResponse(frame)
|
||||
case "event":
|
||||
c.handleEvent(frame)
|
||||
default:
|
||||
c.logger.Debug("unknown frame type", "type", frame.Type, "id", frame.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// handleResponse correlates a response frame to a pending request channel.
|
||||
func (c *WSClient) handleResponse(frame wsFrame) {
|
||||
c.mu.Lock()
|
||||
ch, ok := c.pending[frame.ID]
|
||||
if ok {
|
||||
delete(c.pending, frame.ID)
|
||||
}
|
||||
c.mu.Unlock()
|
||||
|
||||
if !ok {
|
||||
c.logger.Warn("received response for unknown request", "id", frame.ID)
|
||||
return
|
||||
}
|
||||
|
||||
if frame.Error != nil {
|
||||
ch <- nil
|
||||
return
|
||||
}
|
||||
|
||||
ch <- frame.Result
|
||||
}
|
||||
|
||||
// handleEvent dispatches an event frame to registered handlers.
|
||||
func (c *WSClient) handleEvent(frame wsFrame) {
|
||||
c.mu.Lock()
|
||||
handlers := c.handlers[frame.Event]
|
||||
c.mu.Unlock()
|
||||
|
||||
if len(handlers) == 0 {
|
||||
c.logger.Debug("unhandled event", "event", frame.Event)
|
||||
return
|
||||
}
|
||||
|
||||
for _, h := range handlers {
|
||||
h(frame.Params)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Send (RPC) ──────────────────────────────────────────────────────────
|
||||
|
||||
// Send sends a JSON-RPC request to the gateway and returns the response
|
||||
// payload. It is safe for concurrent use.
|
||||
func (c *WSClient) Send(method string, params any) (json.RawMessage, error) {
|
||||
reqID := uuid.New().String()
|
||||
|
||||
var paramsJSON json.RawMessage
|
||||
if params != nil {
|
||||
var err error
|
||||
paramsJSON, err = json.Marshal(params)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshal params: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Register pending response channel
|
||||
respCh := make(chan json.RawMessage, 1)
|
||||
c.mu.Lock()
|
||||
c.pending[reqID] = respCh
|
||||
c.mu.Unlock()
|
||||
|
||||
defer func() {
|
||||
c.mu.Lock()
|
||||
delete(c.pending, reqID)
|
||||
c.mu.Unlock()
|
||||
}()
|
||||
|
||||
// Build and send frame
|
||||
frame := wsFrame{
|
||||
Type: "req",
|
||||
ID: reqID,
|
||||
Method: method,
|
||||
Params: paramsJSON,
|
||||
}
|
||||
|
||||
c.connMu.Lock()
|
||||
err := c.conn.WriteJSON(frame)
|
||||
c.connMu.Unlock()
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("write request: %w", err)
|
||||
}
|
||||
|
||||
// Wait for response with timeout
|
||||
select {
|
||||
case resp := <-respCh:
|
||||
if resp == nil {
|
||||
return nil, fmt.Errorf("gateway returned error for request %s (%s)", reqID, method)
|
||||
}
|
||||
return resp, nil
|
||||
case <-time.After(30 * time.Second):
|
||||
return nil, fmt.Errorf("request %s (%s) timed out", reqID, method)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user