fix: resilient usage display under API rate limiting (#193)

* fix: resilient usage display under API rate limiting

The Anthropic usage API rate-limits to ~1 call per 5 minutes. With the
previous 60s cache TTL, 4 out of 5 API calls returned 429, causing the
HUD to permanently display "(429)" instead of actual usage data.

Three-layer fix:
- Increase cache TTL from 60s to 5 minutes to match rate limit window
- Preserve lastGoodData in cache across rate-limited periods so the HUD
  always shows the best available data instead of errors
- Exponential backoff (60s→120s→240s→5min cap) with Retry-After header
  support for consecutive 429 responses

Also show "syncing..." instead of raw HTTP status on first-run rate limit.

* Update usage-api.ts

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* fix: harden 429 cache fallback behavior

* test: stabilize usage cache suite after rebase

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Jarrod Watts <jarrod@cubelabs.xyz>
This commit is contained in:
Paul
2026-03-13 17:11:36 -07:00
committed by GitHub
parent 98cde23e48
commit e8d64924bc
5 changed files with 249 additions and 20 deletions

View File

@@ -78,9 +78,8 @@ function formatUsagePercent(percent: number | null): string {
function formatUsageError(error?: string): string {
if (!error) return '';
if (error.startsWith('http-')) {
return ` (${error.slice(5)})`;
}
if (error === 'rate-limited') return ' (syncing...)';
if (error.startsWith('http-')) return ` (${error.slice(5)})`;
return ` (${error})`;
}

View File

@@ -251,9 +251,8 @@ function formatUsagePercent(percent: number | null): string {
function formatUsageError(error?: string): string {
if (!error) return '';
if (error.startsWith('http-')) {
return ` (${error.slice(5)})`;
}
if (error === 'rate-limited') return ' (syncing...)';
if (error.startsWith('http-')) return ` (${error.slice(5)})`;
return ` (${error})`;
}

View File

@@ -40,11 +40,15 @@ interface UsageApiResponse {
interface UsageApiResult {
data: UsageApiResponse | null;
error?: string;
/** Retry-After header value in seconds (from 429 responses) */
retryAfterSec?: number;
}
// File-based cache (HUD runs as new process each render, so in-memory cache won't persist)
const CACHE_TTL_MS = 60_000; // 60 seconds
const CACHE_TTL_MS = 5 * 60_000; // 5 minutes — matches Anthropic usage API rate limit window
const CACHE_FAILURE_TTL_MS = 15_000; // 15 seconds for failed requests
const CACHE_RATE_LIMITED_BASE_MS = 60_000; // 60s base for 429 backoff
const CACHE_RATE_LIMITED_MAX_MS = 5 * 60_000; // 5 min max backoff
const CACHE_LOCK_STALE_MS = 30_000;
const CACHE_LOCK_WAIT_MS = 2_000;
const CACHE_LOCK_POLL_MS = 50;
@@ -75,6 +79,12 @@ function isUsingCustomApiEndpoint(env: NodeJS.ProcessEnv = process.env): boolean
interface CacheFile {
data: UsageData;
timestamp: number;
/** Consecutive 429 count for exponential backoff */
rateLimitedCount?: number;
/** Absolute timestamp (ms) when retry is allowed (from Retry-After header) */
retryAfterUntil?: number;
/** Last successful API data — preserved across rate-limited periods */
lastGoodData?: UsageData;
}
interface CacheState {
@@ -107,6 +117,27 @@ function hydrateCacheData(data: UsageData): UsageData {
type CacheTtls = { cacheTtlMs: number; failureCacheTtlMs: number };
function getRateLimitedTtlMs(count: number): number {
// Exponential backoff: 60s, 120s, 240s, capped at 5 min
return Math.min(CACHE_RATE_LIMITED_BASE_MS * Math.pow(2, Math.max(0, count - 1)), CACHE_RATE_LIMITED_MAX_MS);
}
function getRateLimitedRetryUntil(cache: CacheFile): number | null {
if (cache.data.apiError !== 'rate-limited') {
return null;
}
if (cache.retryAfterUntil && cache.retryAfterUntil > cache.timestamp) {
return cache.retryAfterUntil;
}
if (cache.rateLimitedCount && cache.rateLimitedCount > 0) {
return cache.timestamp + getRateLimitedTtlMs(cache.rateLimitedCount);
}
return null;
}
function readCacheState(homeDir: string, now: number, ttls: CacheTtls): CacheState | null {
try {
const cachePath = getCachePath(homeDir);
@@ -115,10 +146,20 @@ function readCacheState(homeDir: string, now: number, ttls: CacheTtls): CacheSta
const content = fs.readFileSync(cachePath, 'utf8');
const cache: CacheFile = JSON.parse(content);
// Check TTL - use shorter TTL for failure results
// Only serve lastGoodData during rate-limit backoff. Other failures should remain visible.
const displayData = (cache.data.apiError === 'rate-limited' && cache.lastGoodData)
? cache.lastGoodData
: cache.data;
const rateLimitedRetryUntil = getRateLimitedRetryUntil(cache);
if (rateLimitedRetryUntil && now < rateLimitedRetryUntil) {
return { data: hydrateCacheData(displayData), timestamp: cache.timestamp, isFresh: true };
}
const ttl = cache.data.apiUnavailable ? ttls.failureCacheTtlMs : ttls.cacheTtlMs;
return {
data: hydrateCacheData(cache.data),
data: hydrateCacheData(displayData),
timestamp: cache.timestamp,
isFresh: now - cache.timestamp < ttl,
};
@@ -127,12 +168,42 @@ function readCacheState(homeDir: string, now: number, ttls: CacheTtls): CacheSta
}
}
function readRateLimitedCount(homeDir: string): number {
try {
const cachePath = getCachePath(homeDir);
if (!fs.existsSync(cachePath)) return 0;
const content = fs.readFileSync(cachePath, 'utf8');
const cache: CacheFile = JSON.parse(content);
return cache.rateLimitedCount ?? 0;
} catch {
return 0;
}
}
function readLastGoodData(homeDir: string): UsageData | null {
try {
const cachePath = getCachePath(homeDir);
if (!fs.existsSync(cachePath)) return null;
const content = fs.readFileSync(cachePath, 'utf8');
const cache: CacheFile = JSON.parse(content);
return cache.lastGoodData ? hydrateCacheData(cache.lastGoodData) : null;
} catch {
return null;
}
}
function readCache(homeDir: string, now: number, ttls: CacheTtls): UsageData | null {
const cache = readCacheState(homeDir, now, ttls);
return cache?.isFresh ? cache.data : null;
}
function writeCache(homeDir: string, data: UsageData, timestamp: number): void {
interface WriteCacheOpts {
rateLimitedCount?: number;
retryAfterUntil?: number;
lastGoodData?: UsageData;
}
function writeCache(homeDir: string, data: UsageData, timestamp: number, opts?: WriteCacheOpts): void {
try {
const cachePath = getCachePath(homeDir);
const cacheDir = path.dirname(cachePath);
@@ -142,6 +213,15 @@ function writeCache(homeDir: string, data: UsageData, timestamp: number): void {
}
const cache: CacheFile = { data, timestamp };
if (opts?.rateLimitedCount && opts.rateLimitedCount > 0) {
cache.rateLimitedCount = opts.rateLimitedCount;
}
if (opts?.retryAfterUntil) {
cache.retryAfterUntil = opts.retryAfterUntil;
}
if (opts?.lastGoodData) {
cache.lastGoodData = opts.lastGoodData;
}
fs.writeFileSync(cachePath, JSON.stringify(cache), 'utf8');
} catch {
// Ignore cache write failures
@@ -323,7 +403,17 @@ export async function getUsage(overrides: Partial<UsageApiDeps> = {}): Promise<U
// Fetch usage from API
const apiResult = await deps.fetchApi(accessToken);
if (!apiResult.data) {
// API call failed, cache the failure to prevent retry storms
const isRateLimited = apiResult.error === 'rate-limited';
const prevCount = readRateLimitedCount(homeDir);
const rateLimitedCount = isRateLimited ? prevCount + 1 : 0;
const retryAfterUntil = isRateLimited && apiResult.retryAfterSec
? now + apiResult.retryAfterSec * 1000
: undefined;
const backoffOpts: WriteCacheOpts = {
rateLimitedCount: isRateLimited ? rateLimitedCount : undefined,
retryAfterUntil,
};
const failureResult: UsageData = {
planName,
fiveHour: null,
@@ -333,7 +423,22 @@ export async function getUsage(overrides: Partial<UsageApiDeps> = {}): Promise<U
apiUnavailable: true,
apiError: apiResult.error,
};
writeCache(homeDir, failureResult, now);
if (isRateLimited) {
const staleCache = readCacheState(homeDir, now, deps.ttls);
const lastGood = readLastGoodData(homeDir);
const goodData = (staleCache && !staleCache.data.apiUnavailable)
? staleCache.data
: lastGood;
if (goodData) {
// Preserve the backoff state in cache, but keep rendering the last successful values.
writeCache(homeDir, failureResult, now, { ...backoffOpts, lastGoodData: goodData });
return goodData;
}
}
writeCache(homeDir, failureResult, now, backoffOpts);
return failureResult;
}
@@ -353,8 +458,8 @@ export async function getUsage(overrides: Partial<UsageApiDeps> = {}): Promise<U
sevenDayResetAt,
};
// Write to file cache
writeCache(homeDir, result, now);
// Write to file cache — also store as lastGoodData for rate-limit resilience
writeCache(homeDir, result, now, { lastGoodData: result });
return result;
} catch (error) {
@@ -855,7 +960,23 @@ function fetchUsageApi(accessToken: string): Promise<UsageApiResult> {
res.on('end', () => {
if (res.statusCode !== 200) {
debug('API returned non-200 status:', res.statusCode);
resolve({ data: null, error: res.statusCode ? `http-${res.statusCode}` : 'http-error' });
// Use a distinct error key for 429 so cache/render can handle it specially
const error = res.statusCode === 429
? 'rate-limited'
: res.statusCode ? `http-${res.statusCode}` : 'http-error';
// Parse Retry-After header (seconds) from 429 responses
let retryAfterSec: number | undefined;
if (res.statusCode === 429) {
const raw = res.headers['retry-after'];
if (raw) {
const parsed = parseInt(String(raw), 10);
if (Number.isFinite(parsed) && parsed > 0) {
retryAfterSec = parsed;
debug('Retry-After:', retryAfterSec, 'seconds');
}
}
}
resolve({ data: null, error, retryAfterSec });
return;
}

View File

@@ -717,6 +717,23 @@ test('renderSessionLine displays warning when API is unavailable', () => {
assert.ok(!line.includes('5h:'), 'should not show 5h when API unavailable');
});
test('renderSessionLine shows syncing hint when usage API is rate-limited', () => {
const ctx = baseContext();
ctx.usageData = {
planName: 'Max',
fiveHour: null,
sevenDay: null,
fiveHourResetAt: null,
sevenDayResetAt: null,
apiUnavailable: true,
apiError: 'rate-limited',
};
const line = renderSessionLine(ctx);
assert.ok(line.includes('usage:'), 'should show usage label');
assert.ok(line.includes('syncing...'), 'should show syncing hint for rate limiting');
assert.ok(!line.includes('rate-limited'), 'should not expose raw rate-limit error key');
});
test('renderSessionLine hides usage when showUsage config is false (hybrid toggle)', () => {
const ctx = baseContext();
ctx.usageData = {

View File

@@ -818,7 +818,7 @@ describe('getKeychainServiceNames', () => {
});
});
describe('getUsage caching behavior', () => {
describe('getUsage caching behavior', { concurrency: false }, () => {
beforeEach(async () => {
cacheTempHome = await createTempHome();
clearCache(cacheTempHome);
@@ -831,7 +831,7 @@ describe('getUsage caching behavior', () => {
}
});
test('cache expires after 60 seconds for success', async () => {
test('cache expires after 5 minutes for success', async () => {
await writeCredentials(cacheTempHome, buildCredentials());
let fetchCalls = 0;
let nowValue = 1000;
@@ -843,11 +843,13 @@ describe('getUsage caching behavior', () => {
await getUsage({ homeDir: () => cacheTempHome, fetchApi, now: () => nowValue, readKeychain: () => null });
assert.equal(fetchCalls, 1);
nowValue += 30_000;
// Still fresh at 2 minutes
nowValue += 120_000;
await getUsage({ homeDir: () => cacheTempHome, fetchApi, now: () => nowValue, readKeychain: () => null });
assert.equal(fetchCalls, 1);
nowValue += 31_000;
// Expired after 5 minutes
nowValue += 181_000;
await getUsage({ homeDir: () => cacheTempHome, fetchApi, now: () => nowValue, readKeychain: () => null });
assert.equal(fetchCalls, 2);
});
@@ -911,6 +913,97 @@ describe('getUsage caching behavior', () => {
assert.equal(fetchCalls, 2);
});
test('serves last good data during rate-limit backoff only', async () => {
await writeCredentials(cacheTempHome, buildCredentials());
let nowValue = 1000;
let fetchCalls = 0;
const fetchApi = async () => {
fetchCalls += 1;
if (fetchCalls === 1) {
return buildApiResult({
data: buildApiResponse({
five_hour: {
utilization: 25,
resets_at: '2026-01-06T15:00:00Z',
},
}),
});
}
return { data: null, error: 'rate-limited', retryAfterSec: 120 };
};
const initial = await getUsage({
homeDir: () => cacheTempHome,
fetchApi,
now: () => nowValue,
readKeychain: () => null,
});
assert.equal(initial?.fiveHour, 25);
nowValue += 301_000;
const rateLimited = await getUsage({
homeDir: () => cacheTempHome,
fetchApi,
now: () => nowValue,
readKeychain: () => null,
});
assert.equal(rateLimited?.fiveHour, 25);
assert.equal(fetchCalls, 2);
nowValue += 60_000;
const cachedDuringBackoff = await getUsage({
homeDir: () => cacheTempHome,
fetchApi,
now: () => nowValue,
readKeychain: () => null,
});
assert.equal(cachedDuringBackoff?.fiveHour, 25);
assert.equal(fetchCalls, 2);
});
test('does not mask non-rate-limited failures with stale good data', async () => {
await writeCredentials(cacheTempHome, buildCredentials());
let nowValue = 1000;
let fetchCalls = 0;
const fetchApi = async () => {
fetchCalls += 1;
if (fetchCalls === 1) {
return buildApiResult({
data: buildApiResponse({
five_hour: {
utilization: 25,
resets_at: '2026-01-06T15:00:00Z',
},
}),
});
}
return { data: null, error: 'network' };
};
const initial = await getUsage({
homeDir: () => cacheTempHome,
fetchApi,
now: () => nowValue,
readKeychain: () => null,
});
assert.equal(initial?.fiveHour, 25);
nowValue += 301_000;
const failure = await getUsage({
homeDir: () => cacheTempHome,
fetchApi,
now: () => nowValue,
readKeychain: () => null,
});
assert.equal(fetchCalls, 2);
assert.equal(failure?.apiUnavailable, true);
assert.equal(failure?.apiError, 'network');
assert.equal(failure?.fiveHour, null);
});
test('deduplicates concurrent refreshes when cache is missing', async () => {
await writeCredentials(cacheTempHome, buildCredentials());
@@ -962,7 +1055,7 @@ describe('getUsage caching behavior', () => {
readKeychain: () => null,
});
nowValue += 61_000;
nowValue += 301_000;
let fetchCalls = 0;
let releaseFetch = () => {};