docker-agent/pkg/modelerrors/modelerrors.go at main · docker/docker-agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
// Package modelerrors provides error classification utilities for LLM model
// providers. It determines whether errors are retryable, identifies context
// window overflow conditions, extracts HTTP status codes from various SDK
// error types, and computes exponential backoff durations.
package modelerrors

import (
	"context"
	"errors"
	"fmt"
	"log/slog"
	"math/rand"
	"net"
	"net/http"
	"regexp"
	"strconv"
	"strings"
	"time"

	"github.com/anthropics/anthropic-sdk-go"
	"google.golang.org/genai"
)

// Backoff and retry-after configuration constants.
const (
	backoffBaseDelay = 200 * time.Millisecond
	backoffMaxDelay  = 2 * time.Second
	backoffFactor    = 2.0
	backoffJitter    = 0.1

	// MaxRetryAfterWait caps how long we'll honor a Retry-After header to prevent
	// a misbehaving server from blocking the agent for an unreasonable amount of time.
	MaxRetryAfterWait = 60 * time.Second
)

// StatusError wraps an HTTP API error with structured metadata for retry decisions.
// Providers wrap SDK errors in this type so the retry loop can use errors.As
// to extract status code and Retry-After without importing provider-specific SDKs.
type StatusError struct {
	// StatusCode is the HTTP status code from the provider's API response.
	StatusCode int
	// RetryAfter is the parsed Retry-After header duration. Zero if absent.
	RetryAfter time.Duration
	// Err is the original error from the provider SDK.
	Err error
}

func (e *StatusError) Error() string {
	return e.Err.Error()
}

func (e *StatusError) Unwrap() error {
	return e.Err
}

// WrapHTTPError wraps err in a *StatusError carrying the HTTP status code and
// parsed Retry-After header from resp. Returns err unchanged if statusCode < 400
// or err is nil. Pass resp=nil when no *http.Response is available.
func WrapHTTPError(statusCode int, resp *http.Response, err error) error {
	if err == nil || statusCode < 400 {
		return err
	}
	var retryAfter time.Duration
	if resp != nil {
		retryAfter = ParseRetryAfterHeader(resp.Header.Get("Retry-After"))
	}
	return &StatusError{
		StatusCode: statusCode,
		RetryAfter: retryAfter,
		Err:        err,
	}
}

// Default fallback configuration.
const (
	// DefaultRetries is the default number of retries per model with exponential
	// backoff for retryable errors (5xx, timeouts). 2 retries means 3 total attempts.
	// This handles transient provider issues without immediately failing over.
	DefaultRetries = 2

	// DefaultCooldown is the default duration to stick with a fallback model
	// after a non-retryable error before retrying the primary.
	DefaultCooldown = 1 * time.Minute
)

// ContextOverflowError wraps an underlying error to indicate that the failure
// was caused by the conversation context exceeding the model's context window.
// This is used to trigger auto-compaction in the runtime loop instead of
// surfacing raw HTTP errors to the user.
type ContextOverflowError struct {
	Underlying error
}

func (e *ContextOverflowError) Error() string {
	if e.Underlying == nil {
		return "context window overflow"
	}
	return "context window overflow: " + e.Underlying.Error()
}

func (e *ContextOverflowError) Unwrap() error {
	return e.Underlying
}

// contextOverflowPatterns contains error message substrings that indicate the
// prompt/context exceeds the model's context window. These patterns are checked
// case-insensitively against error messages from various providers.
var contextOverflowPatterns = []string{
	"prompt is too long",
	"maximum context length",
	"context length exceeded",
	"context_length_exceeded",
	"max_tokens must be greater than",
	"maximum number of tokens",
	"content length exceeds",
	"request too large",
	"payload too large",
	"input is too long",
	"exceeds the model's max token",
	"token limit",
	"reduce your prompt",
	"reduce the length",
}

// IsContextOverflowError checks whether the error indicates the conversation
// context has exceeded the model's context window. It inspects both structured
// SDK error types and raw error message patterns.
//
// Recognised patterns include:
//   - Anthropic 400 "prompt is too long: N tokens > M maximum"
//   - Anthropic 400 "max_tokens must be greater than thinking.budget_tokens"
//     (emitted when the prompt is so large that max_tokens can't accommodate
//     the thinking budget — a proxy for context overflow)
//   - OpenAI 400 "maximum context length" / "context_length_exceeded"
//   - Anthropic 500 that is actually a context overflow (heuristic: the error
//     message is opaque but the conversation was already near the limit)
//
// This function intentionally does NOT match generic 500 errors; callers
// that want to treat an opaque 500 as overflow must check separately with
// additional context (e.g., session token counts).
func IsContextOverflowError(err error) bool {
	if err == nil {
		return false
	}

	// Already wrapped
	if _, ok := errors.AsType[*ContextOverflowError](err); ok {
		return true
	}

	errMsg := strings.ToLower(err.Error())
	for _, pattern := range contextOverflowPatterns {
		if strings.Contains(errMsg, pattern) {
			return true
		}
	}

	return false
}

// statusCodeRegex matches HTTP status codes in error messages (e.g., "429", "500", ": 429 ")
var statusCodeRegex = regexp.MustCompile(`\b([45]\d{2})\b`)

// ExtractHTTPStatusCode attempts to extract an HTTP status code from the error.
// Checks in order:
// 1. Known provider SDK error types (Anthropic, Gemini)
// 2. Regex parsing of error message (fallback for OpenAI and others)
// Returns 0 if no status code found.
func ExtractHTTPStatusCode(err error) int {
	if err == nil {
		return 0
	}

	// Check Anthropic SDK error type (public)
	if anthropicErr, ok := errors.AsType[*anthropic.Error](err); ok {
		return anthropicErr.StatusCode
	}

	// Check Google Gemini SDK error type (public)
	if geminiErr, ok := errors.AsType[*genai.APIError](err); ok {
		return geminiErr.Code
	}

	// For other providers (OpenAI, etc.), extract from error message using regex
	// OpenAI SDK error format: `POST "/v1/...": 429 Too Many Requests {...}`
	matches := statusCodeRegex.FindStringSubmatch(err.Error())
	if len(matches) >= 2 {
		var code int
		if _, err := fmt.Sscanf(matches[1], "%d", &code); err == nil {
			return code
		}
	}

	return 0
}

// IsRetryableStatusCode determines if an HTTP status code is retryable.
// Retryable means we should retry the SAME model with exponential backoff.
//
// Retryable status codes:
// - 5xx (server errors): 500, 502, 503, 504
// - 529 (Anthropic overloaded)
// - 408 (request timeout)
//
// Non-retryable status codes (skip to next model immediately):
// - 429 (rate limit) - provider is explicitly telling us to back off
// - 4xx client errors (400, 401, 403, 404) - won't get better with retry
func IsRetryableStatusCode(statusCode int) bool {
	switch statusCode {
	case 500, 502, 503, 504: // Server errors
		return true
	case 529: // Anthropic overloaded
		return true
	case 408: // Request timeout
		return true
	case 429: // Rate limit - NOT retryable, skip to next model
		return false
	default:
		return false
	}
}

// IsRetryableModelError determines if an error should trigger a retry of the SAME model.
//
// Retryable errors (retry same model with backoff):
// - Network timeouts
// - Temporary network errors
// - HTTP 5xx errors (server errors)
// - HTTP 529 (Anthropic overloaded)
// - HTTP 408 (request timeout)
//
// Non-retryable errors (skip to next model in chain immediately):
// - Context cancellation
// - HTTP 429 (rate limit) - provider is explicitly rate limiting us
// - HTTP 4xx errors (client errors)
// - Authentication errors
// - Invalid request errors
//
// The key distinction is: 429 means "you're calling too fast, slow down" which
// suggests we should try a different model, not keep hammering the same one.
func IsRetryableModelError(err error) bool {
	if err == nil {
		return false
	}

	// Context cancellation is never retryable
	if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
		return false
	}

	// Context overflow errors are never retryable — the context hasn't changed
	// between attempts, so retrying the same oversized payload will always fail.
	// This avoids wasting time on 3 attempts + exponential backoff.
	if IsContextOverflowError(err) {
		slog.Debug("Context overflow error, not retryable", "error", err)
		return false
	}

	// First, try to extract HTTP status code from known SDK error types
	if statusCode := ExtractHTTPStatusCode(err); statusCode != 0 {
		retryable := IsRetryableStatusCode(statusCode)
		slog.Debug("Classified error by status code",
			"status_code", statusCode,
			"retryable", retryable)
		return retryable
	}

	// Check for network errors
	if netErr, ok := errors.AsType[net.Error](err); ok {
		// Timeout errors are retryable
		if netErr.Timeout() {
			slog.Debug("Network timeout error, retryable", "error", err)
			return true
		}
	}

	// Fall back to message-pattern matching for errors without structured status codes
	errMsg := strings.ToLower(err.Error())

	// Retryable patterns (5xx, timeout, network issues)
	// NOTE: 429 is explicitly NOT in this list - we skip to next model for rate limits
	retryablePatterns := []string{
		"500",                   // Internal server error
		"502",                   // Bad gateway
		"503",                   // Service unavailable
		"504",                   // Gateway timeout
		"408",                   // Request timeout
		"timeout",               // Generic timeout
		"connection reset",      // Connection reset
		"connection refused",    // Connection refused
		"no such host",          // DNS failure
		"temporary failure",     // Temporary failure
		"service unavailable",   // Service unavailable
		"internal server error", // Server error
		"bad gateway",           // Gateway error
		"gateway timeout",       // Gateway timeout
		"overloaded",            // Server overloaded
		"overloaded_error",      // Server overloaded
		"other side closed",     // Connection closed by peer
		"fetch failed",          // Network fetch failure
		"reset before headers",  // Connection reset before headers received
		"upstream connect",      // Upstream connection error
		"internal_error",        // HTTP/2 INTERNAL_ERROR (stream-level)
	}

	for _, pattern := range retryablePatterns {
		if strings.Contains(errMsg, pattern) {
			slog.Debug("Matched retryable error pattern", "pattern", pattern)
			return true
		}
	}

	// Non-retryable patterns (skip to next model immediately)
	nonRetryablePatterns := []string{
		"429",               // Rate limit - skip to next model
		"rate limit",        // Rate limit message
		"too many requests", // Rate limit message
		"throttl",           // Throttling (rate limiting)
		"quota",             // Quota exceeded
		"capacity",          // Capacity issues (often rate-limit related)
		"401",               // Unauthorized
		"403",               // Forbidden
		"404",               // Not found
		"400",               // Bad request
		"invalid",           // Invalid request
		"unauthorized",      // Auth error
		"authentication",    // Auth error
		"api key",           // API key error
	}

	for _, pattern := range nonRetryablePatterns {
		if strings.Contains(errMsg, pattern) {
			slog.Debug("Matched non-retryable error pattern", "pattern", pattern)
			return false
		}
	}

	// Default: don't retry unknown errors to be safe
	slog.Debug("Unknown error type, not retrying", "error", err)
	return false
}

// ParseRetryAfterHeader parses a Retry-After header value.
// Supports both seconds (integer) and HTTP-date formats per RFC 7231 §7.1.3.
// Returns 0 if the value is empty, invalid, or results in a non-positive duration.
func ParseRetryAfterHeader(value string) time.Duration {
	if value == "" {
		return 0
	}
	// Try integer seconds first (most common for rate limits)
	if seconds, err := strconv.Atoi(value); err == nil && seconds > 0 {
		return time.Duration(seconds) * time.Second
	}
	// Try HTTP-date format
	if t, err := http.ParseTime(value); err == nil {
		d := time.Until(t)
		if d > 0 {
			return d
		}
	}
	return 0
}

// ClassifyModelError classifies an error for the retry/fallback decision.
//
// If the error chain contains a *StatusError (wrapped by provider adapters),
// its StatusCode and RetryAfter fields are used directly — no provider-specific
// imports needed in the caller.
//
// Returns:
//   - retryable=true:    retry the SAME model with backoff (5xx, timeouts)
//   - rateLimited=true:  it's a 429 error; caller decides retry vs fallback based on config
//   - retryAfter:        Retry-After duration from the provider (only set for 429)
//
// When rateLimited=true, retryable is always false — the caller is responsible for
// deciding whether to retry (when no fallback is configured) or skip to the next
// model (when fallbacks are available).
func ClassifyModelError(err error) (retryable, rateLimited bool, retryAfter time.Duration) {
	if err == nil {
		return false, false, 0
	}

	// Context cancellation and deadline are never retryable.
	if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
		return false, false, 0
	}

	// Context overflow errors are never retryable — retrying the same oversized
	// payload will always fail.
	if IsContextOverflowError(err) {
		return false, false, 0
	}

	// Primary path: typed StatusError wrapped by provider adapters.
	var statusErr *StatusError
	if errors.As(err, &statusErr) {
		if statusErr.StatusCode == http.StatusTooManyRequests {
			return false, true, statusErr.RetryAfter
		}
		return IsRetryableStatusCode(statusErr.StatusCode), false, 0
	}

	// Fallback: providers that don't yet wrap (e.g. Bedrock), or non-provider
	// errors (network, pattern matching).
	statusCode := ExtractHTTPStatusCode(err)
	if statusCode == http.StatusTooManyRequests {
		return false, true, 0 // No Retry-After without StatusError
	}
	if statusCode != 0 {
		return IsRetryableStatusCode(statusCode), false, 0
	}
	return IsRetryableModelError(err), false, 0
}

// CalculateBackoff returns the backoff duration for a given attempt (0-indexed).
// Uses exponential backoff with jitter.
func CalculateBackoff(attempt int) time.Duration {
	if attempt < 0 {
		attempt = 0
	}

	// Calculate exponential delay
	delay := float64(backoffBaseDelay)
	for range attempt {
		delay *= backoffFactor
	}

	// Cap at max delay
	if delay > float64(backoffMaxDelay) {
		delay = float64(backoffMaxDelay)
	}

	// Add jitter (±10%)
	jitter := delay * backoffJitter * (2*rand.Float64() - 1)
	delay += jitter

	return time.Duration(delay)
}

// SleepWithContext sleeps for the specified duration, returning early if context is cancelled.
// Returns true if the sleep completed, false if it was interrupted by context cancellation.
func SleepWithContext(ctx context.Context, d time.Duration) bool {
	timer := time.NewTimer(d)
	defer timer.Stop()

	select {
	case <-timer.C:
		return true
	case <-ctx.Done():
		return false
	}
}

// FormatError returns a user-friendly error message for model errors.
// Context overflow gets a dedicated actionable message; all other errors
// pass through their original message.
func FormatError(err error) string {
	if err == nil {
		return ""
	}

	// Context overflow gets a dedicated, actionable message.
	if _, ok := errors.AsType[*ContextOverflowError](err); ok {
		return "The conversation has exceeded the model's context window and automatic compaction is not enabled. " +
			"Try running /compact to reduce the conversation size, or start a new session."
	}

	return err.Error()
}