ndycode · ndycode · Mar 4, 2026 · Mar 5, 2026 · Mar 5, 2026 · graphite-app
@@ -62,6 +62,7 @@ Used only for host plugin mode through the host runtime config file.
 | `retryAllAccountsRateLimited` | `true` |
 | `retryAllAccountsMaxWaitMs` | `0` |
 | `retryAllAccountsMaxRetries` | `Infinity` |
+| `retryAllAccountsAbsoluteCeilingMs` | `0 ms (0–24h; 0 = unlimited)` |
 | `unsupportedCodexPolicy` | `strict` |
 | `fallbackOnUnsupportedCodexModel` | `false` |
 | `fallbackToGpt52OnUnsupportedGpt53` | `true` |
@@ -193,6 +194,7 @@ Used only for host plugin mode through the host runtime config file.
 | `CODEX_TUI_V2` | Toggle TUI v2 |
 | `CODEX_TUI_COLOR_PROFILE` | TUI color profile |
 | `CODEX_TUI_GLYPHS` | TUI glyph mode |
+| `CODEX_AUTH_RETRY_ALL_ABSOLUTE_CEILING_MS` | Absolute wait ceiling in ms for retry-all-on-rate-limit loop (`0–24h`, `0 = unlimited`) |
 | `CODEX_AUTH_FETCH_TIMEOUT_MS` | Request timeout override |
 | `CODEX_AUTH_STREAM_STALL_TIMEOUT_MS` | Stream stall timeout override |
 | `CODEX_MULTI_AUTH_SYNC_CODEX_CLI` | Toggle Codex CLI state sync |

@@ -86,6 +86,8 @@ Examples:
 - `retryAllAccountsRateLimited`
 - `retryAllAccountsMaxWaitMs`
 - `retryAllAccountsMaxRetries`
+- `retryAllAccountsAbsoluteCeilingMs`
+  Unit: milliseconds. Bounds: `0` to `24h`. `0` means unlimited.
 
 ### Refresh and Recovery
 
@@ -126,6 +128,8 @@ Common operator overrides:
 - `CODEX_TUI_V2`
 - `CODEX_TUI_COLOR_PROFILE`
 - `CODEX_TUI_GLYPHS`
+- `CODEX_AUTH_RETRY_ALL_ABSOLUTE_CEILING_MS`
+  Rotation & Quota override for `retryAllAccountsAbsoluteCeilingMs` (ms, `0` to `24h`, `0` = unlimited).
 - `CODEX_AUTH_FETCH_TIMEOUT_MS`
 - `CODEX_AUTH_STREAM_STALL_TIMEOUT_MS`
 
@@ -175,4 +179,4 @@ codex auth forecast --live
 
 - [commands.md](commands.md)
 - [storage-paths.md](storage-paths.md)
-- [../configuration.md](../configuration.md)
+- [../configuration.md](../configuration.md)
@@ -44,6 +44,7 @@ import {
 	getFastSessionMaxInputItems,
 	getRateLimitToastDebounceMs,
 	getRetryAllAccountsMaxRetries,
+	getRetryAllAccountsAbsoluteCeilingMs,
 	getRetryAllAccountsMaxWaitMs,
 	getRetryAllAccountsRateLimited,
 	getFallbackToGpt52OnUnsupportedGpt53,
@@ -156,6 +157,10 @@ import {
 } from "./lib/request/rate-limit-backoff.js";
 import { isEmptyResponse } from "./lib/request/response-handler.js";
 import { addJitter } from "./lib/rotation.js";
+import {
+	decideRetryAllAccountsRateLimited,
+	type RetryAllAccountsRateLimitDecisionReason,
+} from "./lib/request/retry-governor.js";
 import { SessionAffinityStore } from "./lib/session-affinity.js";
 import { LiveAccountSync } from "./lib/live-account-sync.js";
 import { RefreshGuardian } from "./lib/refresh-guardian.js";
@@ -344,6 +349,9 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 		streamFailoverAttempts: number;
 		streamFailoverRecoveries: number;
 		streamFailoverCrossAccountRecoveries: number;
+		retryGovernorStopsWaitExceedsMax: number;
+		retryGovernorStopsRetryLimitReached: number;
+		retryGovernorStopsAbsoluteCeilingExceeded: number;
 		cumulativeLatencyMs: number;
 		lastRequestAt: number | null;
 		lastError: string | null;
@@ -365,11 +373,32 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 		streamFailoverAttempts: 0,
 		streamFailoverRecoveries: 0,
 		streamFailoverCrossAccountRecoveries: 0,
+		retryGovernorStopsWaitExceedsMax: 0,
+		retryGovernorStopsRetryLimitReached: 0,
+		retryGovernorStopsAbsoluteCeilingExceeded: 0,
 		cumulativeLatencyMs: 0,
 		lastRequestAt: null,
 		lastError: null,
 	};
 
+	const recordRetryGovernorStopReason = (
+		reason: RetryAllAccountsRateLimitDecisionReason,
+	): void => {
+		switch (reason) {
+			case "wait-exceeds-max":
+				runtimeMetrics.retryGovernorStopsWaitExceedsMax += 1;
+				return;
+			case "retry-limit-reached":
+				runtimeMetrics.retryGovernorStopsRetryLimitReached += 1;
+				return;
+			case "absolute-ceiling-exceeded":
+				runtimeMetrics.retryGovernorStopsAbsoluteCeilingExceeded += 1;
+				return;
+			default:
+				return;
+		}
+	};
+
         type TokenSuccess = Extract<TokenResult, { type: "success" }>;
         type TokenSuccessWithAccount = TokenSuccess & {
                 accountIdOverride?: string;
@@ -1124,6 +1153,8 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 				const retryAllAccountsRateLimited = getRetryAllAccountsRateLimited(pluginConfig);
 				const retryAllAccountsMaxWaitMs = getRetryAllAccountsMaxWaitMs(pluginConfig);
 				const retryAllAccountsMaxRetries = getRetryAllAccountsMaxRetries(pluginConfig);
+				const retryAllAccountsAbsoluteCeilingMs =
+					getRetryAllAccountsAbsoluteCeilingMs(pluginConfig);
 				const unsupportedCodexPolicy = getUnsupportedCodexPolicy(pluginConfig);
 				const fallbackOnUnsupportedCodexModel = unsupportedCodexPolicy === "fallback";
 				const fallbackToGpt52OnUnsupportedGpt53 =
@@ -1397,6 +1428,7 @@ export const OpenAIOAuthPlugin: Plugin = async ({ client }: PluginInput) => {
 					};
 
 							let allRateLimitedRetries = 0;
+							let accumulatedAllRateLimitedWaitMs = 0;
 							let emptyResponseRetries = 0;
 							const attemptedUnsupportedFallbackModels = new Set<string>();
 							if (model) {
@@ -2368,20 +2400,53 @@ while (attempted.size < Math.max(1, accountCount)) {
 
 										const waitMs = accountManager.getMinWaitTimeForFamily(modelFamily, model);
 										const count = accountManager.getAccountCount();
+								const jitteredWaitMs = waitMs > 0 ? addJitter(waitMs, 0.2) : 0;
+								const plannedWaitMs =
+									retryAllAccountsAbsoluteCeilingMs > 0
+										? Math.min(
+												jitteredWaitMs,
+												Math.max(
+													0,
+													retryAllAccountsAbsoluteCeilingMs - accumulatedAllRateLimitedWaitMs,
+												),
+											)
+										: jitteredWaitMs;
+								const retryDecision = decideRetryAllAccountsRateLimited({
+									enabled: retryAllAccountsRateLimited,
+									accountCount: count,
+									waitMs: plannedWaitMs,
+									maxWaitMs: retryAllAccountsMaxWaitMs,
+									currentRetryCount: allRateLimitedRetries,
+									maxRetries: retryAllAccountsMaxRetries,
+									accumulatedWaitMs: accumulatedAllRateLimitedWaitMs,
+									absoluteCeilingMs: retryAllAccountsAbsoluteCeilingMs,
+								});
 
-								if (
-									retryAllAccountsRateLimited &&
-									count > 0 &&
-									waitMs > 0 &&
-									(retryAllAccountsMaxWaitMs === 0 ||
-										waitMs <= retryAllAccountsMaxWaitMs) &&
-									allRateLimitedRetries < retryAllAccountsMaxRetries
-								) {
+								if (retryDecision.shouldRetry) {
 									const countdownMessage = `All ${count} account(s) rate-limited. Waiting`;
-									await sleepWithCountdown(addJitter(waitMs, 0.2), countdownMessage);
+									await sleepWithCountdown(plannedWaitMs, countdownMessage);
 									allRateLimitedRetries++;
+									accumulatedAllRateLimitedWaitMs += plannedWaitMs;
 									continue;
 								}
+								recordRetryGovernorStopReason(retryDecision.reason);
+								if (
+									retryDecision.reason !== "disabled" &&
+									retryDecision.reason !== "no-accounts" &&
+									retryDecision.reason !== "no-wait"
+								) {
+									logDebug("Retry governor blocked all-rate-limited retry", {
+										reason: retryDecision.reason,
+										accountCount: count,
+										waitMs,
+										plannedWaitMs,
+										retryCount: allRateLimitedRetries,
+										accumulatedWaitMs: accumulatedAllRateLimitedWaitMs,
+										maxWaitMs: retryAllAccountsMaxWaitMs,
+										maxRetries: retryAllAccountsMaxRetries,
+										absoluteCeilingMs: retryAllAccountsAbsoluteCeilingMs,
+									});
+								}
 
 								const waitLabel = waitMs > 0 ? formatWaitTime(waitMs) : "a bit";
 								const message =
@@ -3763,6 +3828,9 @@ while (attempted.size < Math.max(1, accountCount)) {
 						`Stream failover attempts: ${runtimeMetrics.streamFailoverAttempts}`,
 						`Stream failover recoveries: ${runtimeMetrics.streamFailoverRecoveries}`,
 						`Stream failover cross-account recoveries: ${runtimeMetrics.streamFailoverCrossAccountRecoveries}`,
+						`Retry governor stops (wait>max): ${runtimeMetrics.retryGovernorStopsWaitExceedsMax}`,
+						`Retry governor stops (retry limit): ${runtimeMetrics.retryGovernorStopsRetryLimitReached}`,
+						`Retry governor stops (absolute ceiling): ${runtimeMetrics.retryGovernorStopsAbsoluteCeilingExceeded}`,
 						`Empty-response retries: ${runtimeMetrics.emptyResponseRetries}`,
 						`Session affinity entries: ${sessionAffinityEntries}`,
 						`Live sync: ${liveSyncSnapshot?.running ? "on" : "off"} (${liveSyncSnapshot?.reloadCount ?? 0} reloads)`,
@@ -3798,6 +3866,24 @@ while (attempted.size < Math.max(1, accountCount)) {
 								String(runtimeMetrics.streamFailoverCrossAccountRecoveries),
 								"accent",
 							),
+							formatUiKeyValue(
+								ui,
+								"Retry governor stops (wait>max)",
+								String(runtimeMetrics.retryGovernorStopsWaitExceedsMax),
+								"warning",
+							),
+							formatUiKeyValue(
+								ui,
+								"Retry governor stops (retry limit)",
+								String(runtimeMetrics.retryGovernorStopsRetryLimitReached),
+								"warning",
+							),
+							formatUiKeyValue(
+								ui,
+								"Retry governor stops (absolute ceiling)",
+								String(runtimeMetrics.retryGovernorStopsAbsoluteCeilingExceeded),
+								"warning",
+							),
 							formatUiKeyValue(ui, "Empty-response retries", String(runtimeMetrics.emptyResponseRetries), "warning"),
 							formatUiKeyValue(ui, "Session affinity entries", String(sessionAffinityEntries), "muted"),
 							formatUiKeyValue(

@@ -185,6 +185,7 @@ type BackendNumberSettingKey =
 	| "proactiveRefreshBufferMs"
 	| "parallelProbingMaxConcurrency"
 	| "fastSessionMaxInputItems"
+	| "retryAllAccountsAbsoluteCeilingMs"
 	| "networkErrorCooldownMs"
 	| "serverErrorCooldownMs"
 	| "fetchTimeoutMs"
@@ -377,6 +378,15 @@ const BACKEND_NUMBER_OPTIONS: BackendNumberSettingOption[] = [
 		step: 2,
 		unit: "count",
 	},
+	{
+		key: "retryAllAccountsAbsoluteCeilingMs",
+		label: "Retry-All Absolute Wait Ceiling",
+		description: "Total max wait for retry-all-on-rate-limit. Set 0 for unlimited.",
+		min: 0,
+		max: 24 * 60 * 60_000,
+		step: 30_000,
+		unit: "ms",
+	},
 	{
 		key: "networkErrorCooldownMs",
 		label: "Network Error Cooldown",
@@ -486,6 +496,7 @@ const BACKEND_CATEGORY_OPTIONS: BackendCategoryOption[] = [
 			"preemptiveQuotaRemainingPercent5h",
 			"preemptiveQuotaRemainingPercent7d",
 			"preemptiveQuotaMaxDeferralMs",
+			"retryAllAccountsAbsoluteCeilingMs",
 		],
 	},
 	{
@@ -974,8 +985,20 @@ function buildBackendSettingsPreview(
 		config.preemptiveQuotaRemainingPercent7d ??
 		BACKEND_DEFAULTS.preemptiveQuotaRemainingPercent7d ??
 		5;
+	const retryAllAbsoluteCeilingMs =
+		config.retryAllAccountsAbsoluteCeilingMs ??
+		BACKEND_DEFAULTS.retryAllAccountsAbsoluteCeilingMs ??
+		0;
 	const fetchTimeout = config.fetchTimeoutMs ?? BACKEND_DEFAULTS.fetchTimeoutMs ?? 60_000;
 	const stallTimeout = config.streamStallTimeoutMs ?? BACKEND_DEFAULTS.streamStallTimeoutMs ?? 45_000;
+	const retryAllAbsoluteCeilingOption = BACKEND_NUMBER_OPTION_BY_KEY.get(
+		"retryAllAccountsAbsoluteCeilingMs",
+	);
+	const retryCeilingLabel = retryAllAbsoluteCeilingMs === 0
+		? "unlimited"
+		: retryAllAbsoluteCeilingOption
+			? formatBackendNumberValue(retryAllAbsoluteCeilingOption, retryAllAbsoluteCeilingMs)
+			: `${retryAllAbsoluteCeilingMs}ms`;
 	const fetchTimeoutOption = BACKEND_NUMBER_OPTION_BY_KEY.get("fetchTimeoutMs");
 	const stallTimeoutOption = BACKEND_NUMBER_OPTION_BY_KEY.get("streamStallTimeoutMs");
 
@@ -993,6 +1016,7 @@ function buildBackendSettingsPreview(
 	const hint = [
 		`thresholds 5h<=${highlightIfFocused("preemptiveQuotaRemainingPercent5h", `${threshold5h}%`)}`,
 		`7d<=${highlightIfFocused("preemptiveQuotaRemainingPercent7d", `${threshold7d}%`)}`,
+		`retry ceiling ${highlightIfFocused("retryAllAccountsAbsoluteCeilingMs", retryCeilingLabel)}`,
 		`timeouts ${highlightIfFocused("fetchTimeoutMs", fetchTimeoutOption ? formatBackendNumberValue(fetchTimeoutOption, fetchTimeout) : `${fetchTimeout}ms`)}/${highlightIfFocused("streamStallTimeoutMs", stallTimeoutOption ? formatBackendNumberValue(stallTimeoutOption, stallTimeout) : `${stallTimeout}ms`)}`,
 	].join(" | ");
 
@@ -1069,6 +1093,10 @@ function clampBackendNumberForTests(settingKey: string, value: number): number {
 	return clampBackendNumber(option, value);
 }
 
+function buildBackendSettingsPreviewForTests(config: PluginConfig): { label: string; hint: string } {
+	return buildBackendSettingsPreview(config, getUiRuntimeOptions());
+}
+
 async function withQueuedRetryForTests<T>(
 	pathKey: string,
 	task: () => Promise<T>,
@@ -1093,6 +1121,7 @@ async function persistBackendConfigSelectionForTests(
 
 const __testOnly = {
 	clampBackendNumber: clampBackendNumberForTests,
+	buildBackendSettingsPreview: buildBackendSettingsPreviewForTests,
 	formatMenuLayoutMode,
 	cloneDashboardSettings,
 	withQueuedRetry: withQueuedRetryForTests,

@@ -125,6 +125,7 @@ export const DEFAULT_PLUGIN_CONFIG: PluginConfig = {
 	retryAllAccountsRateLimited: true,
 	retryAllAccountsMaxWaitMs: 0,
 	retryAllAccountsMaxRetries: Infinity,
+	retryAllAccountsAbsoluteCeilingMs: 0,
 	unsupportedCodexPolicy: "strict",
 	fallbackOnUnsupportedCodexModel: false,
 	fallbackToGpt52OnUnsupportedGpt53: true,
@@ -591,6 +592,15 @@ export function getRetryAllAccountsMaxRetries(pluginConfig: PluginConfig): numbe
 	);
 }
 
+export function getRetryAllAccountsAbsoluteCeilingMs(pluginConfig: PluginConfig): number {
+	return resolveNumberSetting(
+		"CODEX_AUTH_RETRY_ALL_ABSOLUTE_CEILING_MS",
+		pluginConfig.retryAllAccountsAbsoluteCeilingMs,
+		0,
+		{ min: 0, max: 24 * 60 * 60_000 },
+	);
+}
+
 export function getUnsupportedCodexPolicy(
 	pluginConfig: PluginConfig,
 ): UnsupportedCodexPolicy {

@@ -0,0 +1,72 @@
+export interface RetryAllAccountsRateLimitDecisionInput {
+	enabled: boolean;
+	accountCount: number;
+	waitMs: number;
+	maxWaitMs: number;
+	currentRetryCount: number;
+	maxRetries: number;
+	accumulatedWaitMs: number;
+	absoluteCeilingMs: number;
+}
+
+export type RetryAllAccountsRateLimitDecisionReason =
+	| "allowed"
+	| "disabled"
+	| "no-accounts"
+	| "no-wait"
+	| "wait-exceeds-max"
+	| "retry-limit-reached"
+	| "absolute-ceiling-exceeded";
+
+export interface RetryAllAccountsRateLimitDecision {
+	shouldRetry: boolean;
+	reason: RetryAllAccountsRateLimitDecisionReason;
+}
+
+function clampNonNegative(value: number): number {
+	if (!Number.isFinite(value)) return 0;
+	return Math.max(0, Math.floor(value));
+}
+
+function normalizeRetryLimit(value: number): number {
+	if (!Number.isFinite(value)) return Number.POSITIVE_INFINITY;
+	return clampNonNegative(value);
+}
+
+/**
+ * Decide whether "retry all accounts when rate-limited" should run for the current loop.
+ *
+ * This helper is pure and deterministic so retry behavior can be tested without
+ * exercising the full request pipeline.
+ */
+export function decideRetryAllAccountsRateLimited(
+	input: RetryAllAccountsRateLimitDecisionInput,
+): RetryAllAccountsRateLimitDecision {
+	const accountCount = clampNonNegative(input.accountCount);
+	const waitMs = clampNonNegative(input.waitMs);
+	const maxWaitMs = clampNonNegative(input.maxWaitMs);
+	const currentRetryCount = clampNonNegative(input.currentRetryCount);
+	const maxRetries = normalizeRetryLimit(input.maxRetries);
+	const accumulatedWaitMs = clampNonNegative(input.accumulatedWaitMs);
+	const absoluteCeilingMs = clampNonNegative(input.absoluteCeilingMs);
+
+	if (!input.enabled) {
+		return { shouldRetry: false, reason: "disabled" };
+	}
+	if (accountCount === 0) {
+		return { shouldRetry: false, reason: "no-accounts" };
+	}
+	if (waitMs === 0) {
+		return { shouldRetry: false, reason: "no-wait" };
+	}
+	if (maxWaitMs > 0 && waitMs > maxWaitMs) {
+		return { shouldRetry: false, reason: "wait-exceeds-max" };
+	}
+	if (currentRetryCount >= maxRetries) {
+		return { shouldRetry: false, reason: "retry-limit-reached" };
+	}
+	if (absoluteCeilingMs > 0 && accumulatedWaitMs + waitMs > absoluteCeilingMs) {
+		return { shouldRetry: false, reason: "absolute-ceiling-exceeded" };
+	}
+	return { shouldRetry: true, reason: "allowed" };
+}