CUB-200: resolve merge conflicts with dev — adopt dev's consolidated workflows and improved Go gateway code
Some checks failed
Dev Build & Deploy / test-and-build (pull_request) Failing after 0s
Dev Build & Deploy / docker-build-push (pull_request) Has been skipped

This commit is contained in:
Dex
2026-05-20 21:26:17 +00:00
30 changed files with 3547 additions and 233 deletions

View File

@@ -1,7 +1,7 @@
// Package gateway provides WebSocket client integration with the OpenClaw
// gateway using WS protocol v3. The WSClient handles connection, handshake,
// frame routing, request/response correlation, and automatic reconnection
// with exponential backoff (1s → 30s max).
// with exponential backoff.
package gateway
import (
@@ -15,8 +15,8 @@ import (
"code.cubecraftcreations.com/CubeCraft-Creations/Control-Center/go-backend/internal/handler"
"code.cubecraftcreations.com/CubeCraft-Creations/Control-Center/go-backend/internal/repository"
"github.com/google/uuid"
"github.com/gorilla/websocket"
"github.com/google/uuid"
)
// WSConfig holds WebSocket client configuration, typically loaded from
@@ -41,19 +41,21 @@ type eventHandler func(json.RawMessage)
// WSClient connects to the OpenClaw gateway over WebSocket, completes the
// v3 handshake, routes incoming frames, and automatically reconnects on
// disconnect with exponential backoff (1s → 30s max).
// disconnect with exponential backoff.
type WSClient struct {
config WSConfig
conn *websocket.Conn
connMu sync.Mutex // protects conn for writes
pending map[string]chan<- json.RawMessage
mu sync.Mutex // protects pending and handlers
agents repository.AgentRepo
broker *handler.Broker
logger *slog.Logger
handlers map[string][]eventHandler
connID string // set after successful hello-ok
restClient *Client // optional REST client to notify on WS ready
config WSConfig
conn *websocket.Conn
connMu sync.Mutex // protects conn for writes
pending map[string]chan<- json.RawMessage
mu sync.Mutex // protects pending and handlers
agents repository.AgentRepo
broker *handler.Broker
logger *slog.Logger
handlers map[string][]eventHandler
connId string // set after successful hello-ok
restClient *Client // optional REST client to notify on WS ready
wsReadyOnce sync.Once // ensures MarkWSReady close is one-shot
}
// NewWSClient returns a WSClient wired to the given repository and broker.
@@ -79,7 +81,7 @@ func (c *WSClient) SetRESTClient(rest *Client) {
// OnEvent registers a handler for the given event name. Handlers are called
// when an incoming frame with type "event" and matching event name is
// received. Safe to call before Start.
// received. This is safe to call before Start.
func (c *WSClient) OnEvent(event string, handler func(json.RawMessage)) {
c.mu.Lock()
defer c.mu.Unlock()
@@ -90,10 +92,10 @@ func (c *WSClient) OnEvent(event string, handler func(json.RawMessage)) {
// wsFrame represents a generic WebSocket frame in the OpenClaw v3 protocol.
type wsFrame struct {
Type string `json:"type"` // "req", "res", "event"
ID string `json:"id,omitempty"` // request/response correlation
Method string `json:"method,omitempty"` // method name (req/res frames)
Event string `json:"event,omitempty"` // event name (event frames)
Type string `json:"type"` // "req", "res", "event"
ID string `json:"id,omitempty"` // request/response correlation
Method string `json:"method,omitempty"` // method name (req frames)
Event string `json:"event,omitempty"` // event name (event frames)
Params json.RawMessage `json:"params,omitempty"`
Result json.RawMessage `json:"result,omitempty"`
Error *wsError `json:"error,omitempty"`
@@ -128,7 +130,7 @@ type connectAuth struct {
// helloOKResponse represents the expected response to a successful connect.
type helloOKResponse struct {
ConnID string `json:"connId"`
ConnID string `json:"connId"`
Features struct {
Methods []string `json:"methods"`
Events []string `json:"events"`
@@ -138,11 +140,12 @@ type helloOKResponse struct {
// ── Start loop ───────────────────────────────────────────────────────────
// Start connects to the gateway, completes the handshake, and begins the
// read loop. On disconnect it reconnects with exponential backoff (1s → 30s).
// On ctx cancellation it performs a clean shutdown.
// read loop. On disconnect it reconnects with exponential backoff. On
// ctx cancellation it performs a clean shutdown.
func (c *WSClient) Start(ctx context.Context) {
backoff := 1 * time.Second
initialBackoff := 1 * time.Second
maxBackoff := 30 * time.Second
backoff := initialBackoff
for {
err := c.connectAndRun(ctx)
@@ -154,6 +157,9 @@ func (c *WSClient) Start(ctx context.Context) {
c.logger.Warn("ws client disconnected, reconnecting",
"error", err,
"backoff", backoff)
} else {
// Reset backoff on successful connect+run completion
backoff = initialBackoff
}
select {
@@ -188,14 +194,26 @@ func (c *WSClient) connectAndRun(ctx context.Context) error {
c.conn = conn
c.connMu.Unlock()
defer conn.Close()
// When context is cancelled, close the conn to unblock ReadJSON in readLoop.
go func() {
<-ctx.Done()
c.connMu.Lock()
if c.conn != nil {
c.conn.Close()
}
c.connMu.Unlock()
}()
defer func() {
conn.Close()
}()
// Step 1: Read the connect.challenge frame
if err := c.readChallenge(conn); err != nil {
return fmt.Errorf("handshake challenge: %w", err)
}
// Step 2: Send connect request and read hello-ok response
// Step 2: Send connect request
helloOK, err := c.sendConnect(conn)
if err != nil {
return fmt.Errorf("handshake connect: %w", err)
@@ -206,8 +224,9 @@ func (c *WSClient) connectAndRun(ctx context.Context) error {
"methods", helloOK.Features.Methods,
"events", helloOK.Features.Events)
// Store connId for reference
c.connMu.Lock()
c.connID = helloOK.ConnID
c.connId = helloOK.ConnID
c.connMu.Unlock()
// Notify REST client that WS is live so it stands down
@@ -216,15 +235,18 @@ func (c *WSClient) connectAndRun(ctx context.Context) error {
c.logger.Info("ws client notified REST fallback to stand down")
}
// Step 3: Initial sync — fetch agents + sessions from gateway
// Reset wsReadyOnce so MarkWSReady can fire again after a reconnect
c.wsReadyOnce = sync.Once{}
// Step 2b: Initial sync — fetch agents + sessions from gateway
if err := c.initialSync(ctx); err != nil {
c.logger.Warn("initial sync failed, continuing with read loop", "error", err)
c.logger.Warn("initial sync failed, will continue with read loop", "error", err)
}
// Step 4: Register live event handlers
// Step 2c: Register live event handlers
c.registerEventHandlers()
// Step 5: Read loop — blocks until disconnect or ctx cancel
// Step 3: Read loop
return c.readLoop(ctx, conn)
}
@@ -240,7 +262,7 @@ func (c *WSClient) readChallenge(conn *websocket.Conn) error {
return fmt.Errorf("expected connect.challenge, got type=%s event=%s", frame.Type, frame.Event)
}
c.logger.Debug("received connect.challenge")
c.logger.Debug("received connect.challenge", "params", string(frame.Params))
return nil
}
@@ -293,6 +315,8 @@ func (c *WSClient) sendConnect(conn *websocket.Conn) (*helloOKResponse, error) {
return nil, fmt.Errorf("response id mismatch: expected %s, got %s", reqID, resFrame.ID)
}
// Check for hello-ok method in the result
// The gateway responds with method "hello-ok" on success
var helloOK helloOKResponse
if err := json.Unmarshal(resFrame.Result, &helloOK); err != nil {
return nil, fmt.Errorf("parse hello-ok: %w", err)
@@ -302,25 +326,16 @@ func (c *WSClient) sendConnect(conn *websocket.Conn) (*helloOKResponse, error) {
}
// readLoop continuously reads frames from the connection and routes them.
// It returns on read error or context cancellation.
// It returns on read error or when the connection is closed by the ctx-done
// goroutine started in connectAndRun.
func (c *WSClient) readLoop(ctx context.Context, conn *websocket.Conn) error {
for {
select {
case <-ctx.Done():
// Clean shutdown: send close frame
c.connMu.Lock()
c.conn.WriteControl(
websocket.CloseMessage,
websocket.FormatCloseMessage(websocket.CloseNormalClosure, "shutdown"),
time.Now().Add(5*time.Second),
)
c.connMu.Unlock()
return ctx.Err()
default:
}
var frame wsFrame
if err := conn.ReadJSON(&frame); err != nil {
if ctx.Err() != nil {
return ctx.Err()
}
// Check if it's a close error
if websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
c.logger.Info("ws connection closed by server")
return nil
@@ -344,7 +359,7 @@ func (c *WSClient) routeFrame(frame wsFrame) {
case "event":
c.handleEvent(frame)
default:
c.logger.Debug("unknown frame type", "type", frame.Type, "id", frame.ID)
c.logger.Warn("unknown frame type", "type", frame.Type, "id", frame.ID)
}
}
@@ -363,6 +378,7 @@ func (c *WSClient) handleResponse(frame wsFrame) {
}
if frame.Error != nil {
// Send nil to signal error; caller checks via Send return
ch <- nil
return
}
@@ -386,20 +402,17 @@ func (c *WSClient) handleEvent(frame wsFrame) {
}
}
// ── Send (RPC) ──────────────────────────────────────────────────────────
// ── Send ─────────────────────────────────────────────────────────────────
// Send sends a JSON-RPC request to the gateway and returns the response
// payload. It is safe for concurrent use.
// Send sends a JSON request to the gateway and returns the response payload.
// It is safe for concurrent use. Returns an error if the client is not
// connected.
func (c *WSClient) Send(method string, params any) (json.RawMessage, error) {
reqID := uuid.New().String()
var paramsJSON json.RawMessage
if params != nil {
var err error
paramsJSON, err = json.Marshal(params)
if err != nil {
return nil, fmt.Errorf("marshal params: %w", err)
}
paramsJSON, err := json.Marshal(params)
if err != nil {
return nil, fmt.Errorf("marshal params: %w", err)
}
// Register pending response channel
@@ -423,7 +436,11 @@ func (c *WSClient) Send(method string, params any) (json.RawMessage, error) {
}
c.connMu.Lock()
err := c.conn.WriteJSON(frame)
if c.conn == nil {
c.connMu.Unlock()
return nil, fmt.Errorf("gateway: not connected")
}
err = c.conn.WriteJSON(frame)
c.connMu.Unlock()
if err != nil {
@@ -434,10 +451,10 @@ func (c *WSClient) Send(method string, params any) (json.RawMessage, error) {
select {
case resp := <-respCh:
if resp == nil {
return nil, fmt.Errorf("gateway returned error for request %s (%s)", reqID, method)
return nil, fmt.Errorf("gateway returned error for request %s", reqID)
}
return resp, nil
case <-time.After(30 * time.Second):
return nil, fmt.Errorf("request %s (%s) timed out", reqID, method)
return nil, fmt.Errorf("request %s timed out", reqID)
}
}