CUB-200: Implement WebSocket Gateway Client #42
@@ -229,26 +229,31 @@ func (c *WSClient) connectAndRun(ctx context.Context) error {
|
|||||||
c.connId = helloOK.ConnID
|
c.connId = helloOK.ConnID
|
||||||
c.connMu.Unlock()
|
c.connMu.Unlock()
|
||||||
|
|
||||||
// Step 2b: Start the read loop in a goroutine so that Send() in
|
// Step 2b: Register live event handlers BEFORE starting the read
|
||||||
|
// loop. This eliminates the race window where readLoop dispatches
|
||||||
|
// live events as "unhandled" because no handlers are registered yet.
|
||||||
|
// The handlers only depend on c.agents and c.broker, which are wired
|
||||||
|
// in the constructor — they do not need initialSync to have completed.
|
||||||
|
c.registerEventHandlers()
|
||||||
|
|
||||||
|
// Step 2c: Start the read loop in a goroutine so that Send() in
|
||||||
// initialSync can receive responses. The read loop goroutine will
|
// initialSync can receive responses. The read loop goroutine will
|
||||||
// continue running after initialSync completes, routing live events
|
// continue running after initialSync completes, routing live events
|
||||||
// and any future RPC responses.
|
// and any future RPC responses. Because handlers are already
|
||||||
|
// registered, any events arriving during or after initialSync are
|
||||||
|
// dispatched correctly.
|
||||||
readLoopErrCh := make(chan error, 1)
|
readLoopErrCh := make(chan error, 1)
|
||||||
go func() {
|
go func() {
|
||||||
readLoopErrCh <- c.readLoop(ctx, conn)
|
readLoopErrCh <- c.readLoop(ctx, conn)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Step 2c: Initial sync — fetch agents + sessions from gateway.
|
// Step 2d: Initial sync — fetch agents + sessions from gateway.
|
||||||
// This now works because the read loop is active and will route
|
// This works because the read loop is active and will route
|
||||||
// response frames back to Send() via handleResponse.
|
// response frames back to Send() via handleResponse.
|
||||||
if err := c.initialSync(ctx); err != nil {
|
if err := c.initialSync(ctx); err != nil {
|
||||||
c.logger.Warn("initial sync failed, will continue with read loop", "error", err)
|
c.logger.Warn("initial sync failed, will continue with read loop", "error", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 2d: Register live event handlers (read loop is already
|
|
||||||
// active, so events will be dispatched immediately)
|
|
||||||
c.registerEventHandlers()
|
|
||||||
|
|
||||||
// Notify REST client that WS is live so it stands down.
|
// Notify REST client that WS is live so it stands down.
|
||||||
// This must happen AFTER initialSync so that the REST poller
|
// This must happen AFTER initialSync so that the REST poller
|
||||||
// doesn't start polling while we're still syncing.
|
// doesn't start polling while we're still syncing.
|
||||||
|
|||||||
@@ -565,6 +565,138 @@ func TestConnectAndRun_InitialSyncOrdering(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── 7. Test: Event not lost during initial sync (regression) ───────────────
|
||||||
|
|
||||||
|
// TestConnectAndRun_EventNotLostDuringSync verifies that live gateway events
|
||||||
|
// arriving during initial sync are NOT dropped. This is a regression test
|
||||||
|
// for the race where readLoop started before registerEventHandlers(),
|
||||||
|
// causing events read during that window to be logged as "unhandled" and lost.
|
||||||
|
//
|
||||||
|
// The mock server sends a live event (sessions.changed) right after the
|
||||||
|
// handshake, interleaved with the RPC responses for agents.list and
|
||||||
|
// sessions.list. The test asserts the event is received by the handler.
|
||||||
|
func TestConnectAndRun_EventNotLostDuringSync(t *testing.T) {
|
||||||
|
repo := &mockAgentRepo{agents: make(map[string]models.AgentCardData)}
|
||||||
|
broker := handler.NewBroker()
|
||||||
|
capture := newBroadcastCapture(broker)
|
||||||
|
defer capture.close()
|
||||||
|
|
||||||
|
// Pre-seed an agent so the event handler can update it.
|
||||||
|
repo.agents["otto"] = models.AgentCardData{
|
||||||
|
ID: "otto",
|
||||||
|
DisplayName: "Otto",
|
||||||
|
Status: models.AgentStatusIdle,
|
||||||
|
}
|
||||||
|
|
||||||
|
srv := newTestWSServer(t, func(conn *websocket.Conn) {
|
||||||
|
// Handshake
|
||||||
|
handleHandshake(t, conn)
|
||||||
|
|
||||||
|
// After handshake, process RPCs and inject a live event.
|
||||||
|
for {
|
||||||
|
var req map[string]any
|
||||||
|
if err := conn.ReadJSON(&req); err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
reqID, _ := req["id"].(string)
|
||||||
|
method, _ := req["method"].(string)
|
||||||
|
|
||||||
|
// Respond to agents.list RPC
|
||||||
|
if method == "agents.list" {
|
||||||
|
// Before responding, inject a live event — simulates
|
||||||
|
// a gateway pushing a presence update during sync.
|
||||||
|
evt := map[string]any{
|
||||||
|
"type": "event",
|
||||||
|
"event": "presence",
|
||||||
|
"params": map[string]any{"agentId": "otto", "connected": true, "lastActivityAt": "2025-05-20T12:30:00Z"},
|
||||||
|
}
|
||||||
|
if err := conn.WriteJSON(evt); err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now send the RPC response
|
||||||
|
res := map[string]any{
|
||||||
|
"type": "res",
|
||||||
|
"id": reqID,
|
||||||
|
"ok": true,
|
||||||
|
"result": []map[string]any{
|
||||||
|
{"id": "otto", "name": "Otto", "role": "Orchestrator", "channel": "discord"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if err := conn.WriteJSON(res); err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Respond to sessions.list RPC
|
||||||
|
if method == "sessions.list" {
|
||||||
|
res := map[string]any{
|
||||||
|
"type": "res",
|
||||||
|
"id": reqID,
|
||||||
|
"ok": true,
|
||||||
|
"result": []map[string]any{},
|
||||||
|
}
|
||||||
|
if err := conn.WriteJSON(res); err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default response for other methods
|
||||||
|
res := map[string]any{
|
||||||
|
"type": "res",
|
||||||
|
"id": reqID,
|
||||||
|
"ok": true,
|
||||||
|
"result": map[string]any{},
|
||||||
|
}
|
||||||
|
if err := conn.WriteJSON(res); err != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
client := NewWSClient(WSConfig{URL: wsURL(srv), AuthToken: "test-token"}, repo, broker, slog.Default())
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
client.Start(ctx)
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Wait for the presence event to be processed by checking the repo.
|
||||||
|
// The presence handler updates the agent, so we check for the
|
||||||
|
// lastActivityAt change.
|
||||||
|
deadline := time.Now().Add(5 * time.Second)
|
||||||
|
var lastActivity string
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
repo.mu.Lock()
|
||||||
|
if a, ok := repo.agents["otto"]; ok {
|
||||||
|
lastActivity = a.LastActivity
|
||||||
|
}
|
||||||
|
repo.mu.Unlock()
|
||||||
|
if lastActivity == "2025-05-20T12:30:00Z" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
time.Sleep(50 * time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
if lastActivity != "2025-05-20T12:30:00Z" {
|
||||||
|
t.Errorf("presence event during sync was lost: lastActivity = %q, want %q", lastActivity, "2025-05-20T12:30:00Z")
|
||||||
|
}
|
||||||
|
|
||||||
|
cancel()
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
case <-time.After(3 * time.Second):
|
||||||
|
t.Fatal("WSClient did not shut down cleanly")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestStrPtr(t *testing.T) {
|
func TestStrPtr(t *testing.T) {
|
||||||
s := "hello"
|
s := "hello"
|
||||||
p := strPtr(s)
|
p := strPtr(s)
|
||||||
|
|||||||
Reference in New Issue
Block a user