Merge pull request 'CUB-200: Implement WebSocket Gateway Client' (!42) from agent/dex/CUB-200-ws-gateway-client into dev
Dev Build & Deploy / test-and-build (push) Failing after 1s
Dev Build & Deploy / docker-build-push (push) Has been skipped

Reviewed-on: #42
This commit was merged in pull request #42.
This commit is contained in:
2026-05-21 06:54:55 -04:00
6 changed files with 320 additions and 66 deletions
+1 -1
View File
@@ -32,7 +32,7 @@ GATEWAY_POLL_INTERVAL=5s
# When using docker-compose, these are set in the services section # When using docker-compose, these are set in the services section
# See docker-compose.yml for service-specific environment variables # See docker-compose.yml for service-specific environment variables
# ── Database Configuration ───────────────────────────────────────────── # ── Database Configuration ───────────────────────────────────────────────
# Set in the db service environment section of docker-compose.yml # Set in the db service environment section of docker-compose.yml
# POSTGRES_USER=controlcenter # POSTGRES_USER=controlcenter
# POSTGRES_PASSWORD=controlcenter # POSTGRES_PASSWORD=controlcenter
+1 -1
View File
@@ -112,7 +112,7 @@ func main() {
<-quit <-quit
slog.Info("shutting down server...") slog.Info("shutting down server...")
cancel() // stop gateway polling cancel() // stop gateway clients
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 15*time.Second) shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 15*time.Second)
defer shutdownCancel() defer shutdownCancel()
+4 -1
View File
@@ -1,6 +1,10 @@
// Package gateway provides an OpenClaw gateway integration client that // Package gateway provides an OpenClaw gateway integration client that
// polls agent states, persists them via the repository layer, and broadcasts // polls agent states, persists them via the repository layer, and broadcasts
// changes through the SSE broker for real-time frontend updates. // changes through the SSE broker for real-time frontend updates.
//
// When a WSClient is wired via SetWSClient, the REST poller becomes a
// fallback: it waits for the WS client to signal readiness, and only starts
// polling if WS fails to connect within 30 seconds.
package gateway package gateway
import ( import (
@@ -140,7 +144,6 @@ func (c *Client) poll(ctx context.Context) {
} }
for _, ga := range agents { for _, ga := range agents {
// Check if agent already exists; if so, update; otherwise create.
existing, err := c.agents.Get(ctx, ga.ID) existing, err := c.agents.Get(ctx, ga.ID)
if err != nil { if err != nil {
// Not found — create it // Not found — create it
+31 -11
View File
@@ -229,7 +229,34 @@ func (c *WSClient) connectAndRun(ctx context.Context) error {
c.connId = helloOK.ConnID c.connId = helloOK.ConnID
c.connMu.Unlock() c.connMu.Unlock()
// Notify REST client that WS is live so it stands down // Step 2b: Register live event handlers BEFORE starting the read
// loop. This eliminates the race window where readLoop dispatches
// live events as "unhandled" because no handlers are registered yet.
// The handlers only depend on c.agents and c.broker, which are wired
// in the constructor — they do not need initialSync to have completed.
c.registerEventHandlers()
// Step 2c: Start the read loop in a goroutine so that Send() in
// initialSync can receive responses. The read loop goroutine will
// continue running after initialSync completes, routing live events
// and any future RPC responses. Because handlers are already
// registered, any events arriving during or after initialSync are
// dispatched correctly.
readLoopErrCh := make(chan error, 1)
go func() {
readLoopErrCh <- c.readLoop(ctx, conn)
}()
// Step 2d: Initial sync — fetch agents + sessions from gateway.
// This works because the read loop is active and will route
// response frames back to Send() via handleResponse.
if err := c.initialSync(ctx); err != nil {
c.logger.Warn("initial sync failed, will continue with read loop", "error", err)
}
// Notify REST client that WS is live so it stands down.
// This must happen AFTER initialSync so that the REST poller
// doesn't start polling while we're still syncing.
if c.restClient != nil { if c.restClient != nil {
c.restClient.MarkWSReady() c.restClient.MarkWSReady()
c.logger.Info("ws client notified REST fallback to stand down") c.logger.Info("ws client notified REST fallback to stand down")
@@ -238,16 +265,9 @@ func (c *WSClient) connectAndRun(ctx context.Context) error {
// Reset wsReadyOnce so MarkWSReady can fire again after a reconnect // Reset wsReadyOnce so MarkWSReady can fire again after a reconnect
c.wsReadyOnce = sync.Once{} c.wsReadyOnce = sync.Once{}
// Step 2b: Initial sync — fetch agents + sessions from gateway // Step 3: Wait for the read loop goroutine to finish (blocks
if err := c.initialSync(ctx); err != nil { // until the connection drops or context is cancelled).
c.logger.Warn("initial sync failed, will continue with read loop", "error", err) return <-readLoopErrCh
}
// Step 2c: Register live event handlers
c.registerEventHandlers()
// Step 3: Read loop
return c.readLoop(ctx, conn)
} }
// readChallenge reads the first frame from the gateway, which must be a // readChallenge reads the first frame from the gateway, which must be a
@@ -11,6 +11,7 @@ import (
"testing" "testing"
"time" "time"
"code.cubecraftcreations.com/CubeCraft-Creations/Control-Center/go-backend/internal/handler"
"code.cubecraftcreations.com/CubeCraft-Creations/Control-Center/go-backend/internal/models" "code.cubecraftcreations.com/CubeCraft-Creations/Control-Center/go-backend/internal/models"
"github.com/gorilla/websocket" "github.com/gorilla/websocket"
@@ -466,6 +467,236 @@ func TestAgentItemToCard(t *testing.T) {
}) })
} }
// ── 6. Test: Initial sync ordering (readLoop active before Send) ──────────
// TestConnectAndRun_InitialSyncOrdering verifies that the WS client
// completes initial sync successfully. This test would hang/timeout if
// readLoop were NOT started before initialSync, because Send() relies on
// readLoop→routeFrame→handleResponse to deliver RPC responses.
func TestConnectAndRun_InitialSyncOrdering(t *testing.T) {
repo := &mockAgentRepo{agents: make(map[string]models.AgentCardData)}
broker := handler.NewBroker()
capture := newBroadcastCapture(broker)
defer capture.close()
srv := newTestWSServer(t, func(conn *websocket.Conn) {
// Handshake
handleHandshake(t, conn)
// After handshake, respond to RPCs
for {
var req map[string]any
if err := conn.ReadJSON(&req); err != nil {
break
}
reqID, _ := req["id"].(string)
method, _ := req["method"].(string)
var result any
switch method {
case "agents.list":
result = []map[string]any{
{"id": "otto", "name": "Otto", "role": "Orchestrator", "channel": "discord"},
{"id": "dex", "name": "Dex", "role": "Backend Dev", "channel": "telegram"},
}
case "sessions.list":
result = []map[string]any{
{"sessionKey": "s1", "agentId": "otto", "status": "running", "totalTokens": 500, "lastActivityAt": "2025-05-20T12:00:00Z"},
}
default:
result = map[string]any{}
}
res := map[string]any{
"type": "res",
"id": reqID,
"ok": true,
"result": result,
}
if err := conn.WriteJSON(res); err != nil {
break
}
}
})
defer srv.Close()
client := NewWSClient(WSConfig{URL: wsURL(srv), AuthToken: "test-token"}, repo, broker, slog.Default())
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
done := make(chan struct{})
go func() {
client.Start(ctx)
close(done)
}()
// Wait for initial sync to complete by checking repo state.
// The agents should be persisted from the RPC responses.
deadline := time.Now().Add(5 * time.Second)
for time.Now().Before(deadline) {
repo.mu.Lock()
_, ottoOK := repo.agents["otto"]
_, dexOK := repo.agents["dex"]
repo.mu.Unlock()
if ottoOK && dexOK {
break
}
time.Sleep(50 * time.Millisecond)
}
repo.mu.Lock()
_, ottoOK := repo.agents["otto"]
_, dexOK := repo.agents["dex"]
repo.mu.Unlock()
if !ottoOK {
t.Error("otto not found in repo after initial sync — readLoop may not have been active before Send()")
}
if !dexOK {
t.Error("dex not found in repo after initial sync — readLoop may not have been active before Send()")
}
cancel()
select {
case <-done:
case <-time.After(3 * time.Second):
t.Fatal("WSClient did not shut down cleanly")
}
}
// ── 7. Test: Event not lost during initial sync (regression) ───────────────
// TestConnectAndRun_EventNotLostDuringSync verifies that live gateway events
// arriving during initial sync are NOT dropped. This is a regression test
// for the race where readLoop started before registerEventHandlers(),
// causing events read during that window to be logged as "unhandled" and lost.
//
// The mock server sends a live event (sessions.changed) right after the
// handshake, interleaved with the RPC responses for agents.list and
// sessions.list. The test asserts the event is received by the handler.
func TestConnectAndRun_EventNotLostDuringSync(t *testing.T) {
repo := &mockAgentRepo{agents: make(map[string]models.AgentCardData)}
broker := handler.NewBroker()
capture := newBroadcastCapture(broker)
defer capture.close()
// Pre-seed an agent so the event handler can update it.
repo.agents["otto"] = models.AgentCardData{
ID: "otto",
DisplayName: "Otto",
Status: models.AgentStatusIdle,
}
srv := newTestWSServer(t, func(conn *websocket.Conn) {
// Handshake
handleHandshake(t, conn)
// After handshake, process RPCs and inject a live event.
for {
var req map[string]any
if err := conn.ReadJSON(&req); err != nil {
break
}
reqID, _ := req["id"].(string)
method, _ := req["method"].(string)
// Respond to agents.list RPC
if method == "agents.list" {
// Before responding, inject a live event — simulates
// a gateway pushing a presence update during sync.
evt := map[string]any{
"type": "event",
"event": "presence",
"params": map[string]any{"agentId": "otto", "connected": true, "lastActivityAt": "2025-05-20T12:30:00Z"},
}
if err := conn.WriteJSON(evt); err != nil {
break
}
// Now send the RPC response
res := map[string]any{
"type": "res",
"id": reqID,
"ok": true,
"result": []map[string]any{
{"id": "otto", "name": "Otto", "role": "Orchestrator", "channel": "discord"},
},
}
if err := conn.WriteJSON(res); err != nil {
break
}
continue
}
// Respond to sessions.list RPC
if method == "sessions.list" {
res := map[string]any{
"type": "res",
"id": reqID,
"ok": true,
"result": []map[string]any{},
}
if err := conn.WriteJSON(res); err != nil {
break
}
continue
}
// Default response for other methods
res := map[string]any{
"type": "res",
"id": reqID,
"ok": true,
"result": map[string]any{},
}
if err := conn.WriteJSON(res); err != nil {
break
}
}
})
defer srv.Close()
client := NewWSClient(WSConfig{URL: wsURL(srv), AuthToken: "test-token"}, repo, broker, slog.Default())
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
done := make(chan struct{})
go func() {
client.Start(ctx)
close(done)
}()
// Wait for the presence event to be processed by checking the repo.
// The presence handler updates the agent, so we check for the
// lastActivityAt change.
deadline := time.Now().Add(5 * time.Second)
var lastActivity string
for time.Now().Before(deadline) {
repo.mu.Lock()
if a, ok := repo.agents["otto"]; ok {
lastActivity = a.LastActivity
}
repo.mu.Unlock()
if lastActivity == "2025-05-20T12:30:00Z" {
break
}
time.Sleep(50 * time.Millisecond)
}
if lastActivity != "2025-05-20T12:30:00Z" {
t.Errorf("presence event during sync was lost: lastActivity = %q, want %q", lastActivity, "2025-05-20T12:30:00Z")
}
cancel()
select {
case <-done:
case <-time.After(3 * time.Second):
t.Fatal("WSClient did not shut down cleanly")
}
}
func TestStrPtr(t *testing.T) { func TestStrPtr(t *testing.T) {
s := "hello" s := "hello"
p := strPtr(s) p := strPtr(s)