CUB-203: fix Grimm review blocking issues (PR #41)
Some checks failed
Dev Build / deploy-dev (pull_request) Blocked by required conditions
Dev Build / build-test (pull_request) Waiting to run
Build (Dev) / build-go-backend (pull_request) Failing after 0s
Build (Dev) / trigger-deploy (pull_request) Has been skipped
Build (Dev) / build-frontend (pull_request) Failing after 1s
openclaw/grimm-review All 11 findings resolved. Approved.

🔴 readLoop race: replace WriteControl close with ctx-done goroutine that closes conn
🔴 duplicate event handlers: clear handlers map before re-registering on reconnect
🔴 sync.go CurrentTask abuse: add DisplayName field to UpdateAgentRequest, use it
🔴 sync.go newRole dead code: add Role field to UpdateAgentRequest, use it
🔴 events.go handlePresence DB/SSE inconsistency: pass LastActivityAt in update, don't mutate after DB
🔴 events.go handleAgentConfig DB/SSE inconsistency: use DisplayName/Role fields in update
🟠 Send() nil-conn panic: check conn != nil before WriteJSON
🟠 readLoop prompt ctx cancellation: fixed by item #1
🟠 backoff never resets: reset to initialBackoff after successful connectAndRun
🟠 MarkWSReady double-close race: use sync.Once in Client
Extra json:"-" dead fields: removed from sessionChangedPayload, presencePayload, agentConfigPayload
UpdateAgentRequest: added DisplayName, Role, LastActivityAt fields
This commit is contained in:
Dex
2026-05-20 11:47:11 +00:00
parent 7a93d43b7e
commit 4569fef11d
6 changed files with 462 additions and 64 deletions

View File

@@ -55,6 +55,7 @@ type WSClient struct {
handlers map[string][]eventHandler
connId string // set after successful hello-ok
restClient *Client // optional REST client to notify on WS ready
wsReadyOnce sync.Once // ensures MarkWSReady close is one-shot
}
// NewWSClient returns a WSClient wired to the given repository and broker.
@@ -142,8 +143,9 @@ type helloOKResponse struct {
// read loop. On disconnect it reconnects with exponential backoff. On
// ctx cancellation it performs a clean shutdown.
func (c *WSClient) Start(ctx context.Context) {
backoff := 1 * time.Second
initialBackoff := 1 * time.Second
maxBackoff := 30 * time.Second
backoff := initialBackoff
for {
err := c.connectAndRun(ctx)
@@ -155,6 +157,9 @@ func (c *WSClient) Start(ctx context.Context) {
c.logger.Warn("ws client disconnected, reconnecting",
"error", err,
"backoff", backoff)
} else {
// Reset backoff on successful connect+run completion
backoff = initialBackoff
}
select {
@@ -189,7 +194,16 @@ func (c *WSClient) connectAndRun(ctx context.Context) error {
c.conn = conn
c.connMu.Unlock()
// Reset backoff on successful connect
// When context is cancelled, close the conn to unblock ReadJSON in readLoop.
go func() {
<-ctx.Done()
c.connMu.Lock()
if c.conn != nil {
c.conn.Close()
}
c.connMu.Unlock()
}()
defer func() {
conn.Close()
}()
@@ -221,6 +235,9 @@ func (c *WSClient) connectAndRun(ctx context.Context) error {
c.logger.Info("ws client notified REST fallback to stand down")
}
// Reset wsReadyOnce so MarkWSReady can fire again after a reconnect
c.wsReadyOnce = sync.Once{}
// Step 2b: Initial sync — fetch agents + sessions from gateway
if err := c.initialSync(ctx); err != nil {
c.logger.Warn("initial sync failed, will continue with read loop", "error", err)
@@ -309,25 +326,15 @@ func (c *WSClient) sendConnect(conn *websocket.Conn) (*helloOKResponse, error) {
}
// readLoop continuously reads frames from the connection and routes them.
// It returns on read error or context cancellation.
// It returns on read error or when the connection is closed by the ctx-done
// goroutine started in connectAndRun.
func (c *WSClient) readLoop(ctx context.Context, conn *websocket.Conn) error {
for {
select {
case <-ctx.Done():
// Clean shutdown: send close frame
c.connMu.Lock()
c.conn.WriteControl(
websocket.CloseMessage,
websocket.FormatCloseMessage(websocket.CloseNormalClosure, "shutdown"),
time.Now().Add(5*time.Second),
)
c.connMu.Unlock()
return ctx.Err()
default:
}
var frame wsFrame
if err := conn.ReadJSON(&frame); err != nil {
if ctx.Err() != nil {
return ctx.Err()
}
// Check if it's a close error
if websocket.IsCloseError(err, websocket.CloseNormalClosure, websocket.CloseGoingAway) {
c.logger.Info("ws connection closed by server")
@@ -398,9 +405,8 @@ func (c *WSClient) handleEvent(frame wsFrame) {
// ── Send ─────────────────────────────────────────────────────────────────
// Send sends a JSON request to the gateway and returns the response payload.
// It is safe for concurrent use. The caller should check for errors in the
// returned payload. A nil payload with nil error means the gateway sent an
// error response (check via the response frame's error field, which is logged).
// It is safe for concurrent use. Returns an error if the client is not
// connected.
func (c *WSClient) Send(method string, params any) (json.RawMessage, error) {
reqID := uuid.New().String()
@@ -430,6 +436,10 @@ func (c *WSClient) Send(method string, params any) (json.RawMessage, error) {
}
c.connMu.Lock()
if c.conn == nil {
c.connMu.Unlock()
return nil, fmt.Errorf("gateway: not connected")
}
err = c.conn.WriteJSON(frame)
c.connMu.Unlock()