Rate limiting is essential for API stability but frustrating when you hit limits. This guide covers how to handle rate limits gracefully and optimize API usage.
Understanding Rate Limit Errors
Common responses:
HTTP/1.1 429 Too Many Requests
Retry-After: 60
{
"error": "rate_limit_exceeded",
"message": "Rate limit exceeded. Try again in 60 seconds.",
"retry_after": 60
}
Rate limit headers (varies by API):
X-RateLimit-Limit: 1000
X-RateLimit-Remaining: 0
X-RateLimit-Reset: 1705680000
Retry-After: 60
Solution 1: Exponential Backoff
JavaScript/Node.js implementation:
async function fetchWithRetry(url, options = {}, maxRetries = 5) {
let lastError;
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
const response = await fetch(url, options);
// Check rate limit headers
const remaining = response.headers.get('X-RateLimit-Remaining');
if (remaining && parseInt(remaining) < 10) {
console.warn(`Rate limit warning: ${remaining} requests remaining`);
}
if (response.status === 429) {
const retryAfter = response.headers.get('Retry-After');
const waitTime = retryAfter
? parseInt(retryAfter) * 1000
: Math.min(1000 * Math.pow(2, attempt), 60000); // Max 60s
console.log(`Rate limited. Waiting ${waitTime}ms before retry ${attempt + 1}`);
await sleep(waitTime);
continue;
}
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
return response;
} catch (error) {
lastError = error;
if (attempt < maxRetries - 1) {
const waitTime = Math.min(1000 * Math.pow(2, attempt), 30000);
console.log(`Request failed. Retrying in ${waitTime}ms...`);
await sleep(waitTime);
}
}
}
throw lastError;
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}Python implementation:
import time
import requests
from functools import wraps
def retry_with_backoff(max_retries=5, base_delay=1, max_delay=60):
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_retries):
try:
response = func(*args, **kwargs)
if response.status_code == 429:
retry_after = response.headers.get('Retry-After')
if retry_after:
wait_time = int(retry_after)
else:
wait_time = min(base_delay * (2 ** attempt), max_delay)
print(f"Rate limited. Waiting {wait_time}s...")
time.sleep(wait_time)
continue
response.raise_for_status()
return response
except requests.exceptions.RequestException as e:
if attempt == max_retries - 1:
raise
wait_time = min(base_delay * (2 ** attempt), max_delay)
print(f"Request failed: {e}. Retrying in {wait_time}s...")
time.sleep(wait_time)
raise Exception(f"Max retries ({max_retries}) exceeded")
return wrapper
return decorator
@retry_with_backoff(max_retries=5)
def call_api(url, headers=None):
return requests.get(url, headers=headers)Solution 2: Request Queuing with Rate Limiter
Token bucket implementation:
class RateLimiter {
constructor(tokensPerSecond, maxTokens) {
this.tokensPerSecond = tokensPerSecond;
this.maxTokens = maxTokens;
this.tokens = maxTokens;
this.lastRefill = Date.now();
this.queue = [];
}
refillTokens() {
const now = Date.now();
const elapsed = (now - this.lastRefill) / 1000;
this.tokens = Math.min(this.maxTokens, this.tokens + elapsed * this.tokensPerSecond);
this.lastRefill = now;
}
async acquire() {
return new Promise((resolve) => {
const tryAcquire = () => {
this.refillTokens();
if (this.tokens >= 1) {
this.tokens -= 1;
resolve();
} else {
// Wait until a token is available
const waitTime = (1 - this.tokens) / this.tokensPerSecond * 1000;
setTimeout(tryAcquire, waitTime);
}
};
tryAcquire();
});
}
async execute(fn) {
await this.acquire();
return fn();
}
}
// Usage: 10 requests per second, burst of 20
const limiter = new RateLimiter(10, 20);
async function makeRateLimitedRequest(url) {
return limiter.execute(() => fetch(url));
}
// Process multiple requests respecting rate limits
async function processItems(items) {
const results = [];
for (const item of items) {
const result = await makeRateLimitedRequest(`/api/items/${item.id}`);
results.push(result);
}
return results;
}Python with ratelimit library:
from ratelimit import limits, sleep_and_retry
# 100 calls per minute
@sleep_and_retry
@limits(calls=100, period=60)
def call_api(url):
response = requests.get(url)
if response.status_code == 429:
raise Exception("Rate limited")
return response
# With custom backoff
import backoff
@backoff.on_exception(
backoff.expo,
requests.exceptions.RequestException,
max_tries=5,
giveup=lambda e: e.response is not None and e.response.status_code < 500
)
def robust_api_call(url):
response = requests.get(url)
response.raise_for_status()
return responseSolution 3: Batch Requests
Instead of individual calls, use batch endpoints:
// ❌ Individual requests (high rate limit usage)
for (const userId of userIds) {
const user = await fetch(`/api/users/${userId}`);
users.push(user);
}
// ✅ Batch request (single rate limit hit)
const users = await fetch('/api/users/batch', {
method: 'POST',
body: JSON.stringify({ ids: userIds })
});GraphQL batching:
// Instead of multiple queries
const queries = userIds.map(id =>
fetch('/graphql', {
body: JSON.stringify({ query: `{ user(id: "${id}") { name email } }` })
})
);
// Use batched query
const batchedQuery = `{
${userIds.map((id, i) => `user${i}: user(id: "${id}") { name email }`).join('\n')}
}`;
const result = await fetch('/graphql', {
body: JSON.stringify({ query: batchedQuery })
});Solution 4: Caching to Reduce Calls
import NodeCache from 'node-cache';
const cache = new NodeCache({ stdTTL: 300 }); // 5 minute TTL
async function getCachedData(key, fetchFn) {
const cached = cache.get(key);
if (cached) {
return cached;
}
const data = await fetchFn();
cache.set(key, data);
return data;
}
// Usage
async function getUser(userId) {
return getCachedData(`user:${userId}`, async () => {
const response = await fetch(`/api/users/${userId}`);
return response.json();
});
}Redis caching for distributed systems:
import redis
import json
redis_client = redis.Redis(host='localhost', port=6379, db=0)
def get_with_cache(key, fetch_fn, ttl=300):
cached = redis_client.get(key)
if cached:
return json.loads(cached)
data = fetch_fn()
redis_client.setex(key, ttl, json.dumps(data))
return dataSolution 5: Monitor and Alert
class RateLimitMonitor {
constructor() {
this.metrics = {
totalRequests: 0,
rateLimited: 0,
remaining: {},
};
}
recordRequest(apiName, response) {
this.metrics.totalRequests++;
if (response.status === 429) {
this.metrics.rateLimited++;
console.error(`RATE LIMITED: ${apiName}`);
// Send alert to monitoring system
this.sendAlert(apiName);
}
const remaining = response.headers.get('X-RateLimit-Remaining');
const limit = response.headers.get('X-RateLimit-Limit');
if (remaining && limit) {
this.metrics.remaining[apiName] = {
remaining: parseInt(remaining),
limit: parseInt(limit),
percentage: (parseInt(remaining) / parseInt(limit)) * 100
};
// Warn at 20% remaining
if (parseInt(remaining) / parseInt(limit) < 0.2) {
console.warn(`Low rate limit: ${apiName} has ${remaining}/${limit} remaining`);
}
}
}
sendAlert(apiName) {
// Integrate with your alerting system
// e.g., PagerDuty, Slack, email
}
getMetrics() {
return this.metrics;
}
}Common API Rate Limits
| API | Rate Limit | Notes | |-----|------------|-------| | OpenAI | Varies by tier | Token + request limits | | GitHub | 5000/hour (authenticated) | 60/hour unauthenticated | | Stripe | 100/sec (live), 25/sec (test) | Per API key | | Twitter/X | 50-500/15min | Depends on endpoint | | Google APIs | Varies | Per project quotas | | Salesforce | 100,000/24hr | Depends on edition | | Slack | 1/second | Tier 1-4 methods vary |
Error-Specific Handling
async function handleApiResponse(response) {
switch (response.status) {
case 429:
const retryAfter = response.headers.get('Retry-After');
return {
error: 'rate_limited',
retryAfter: retryAfter ? parseInt(retryAfter) : 60,
strategy: 'exponential_backoff'
};
case 503:
// Service unavailable - different from rate limit
return {
error: 'service_unavailable',
retryAfter: 30,
strategy: 'linear_backoff'
};
case 401:
// Auth error - don't retry
return {
error: 'unauthorized',
retryAfter: null,
strategy: 'fail_fast'
};
default:
if (response.ok) {
return { success: true, data: await response.json() };
}
return {
error: 'unknown',
status: response.status,
strategy: 'exponential_backoff'
};
}
}Quick Reference: Retry Strategies
| Scenario | Strategy | Example Delays | |----------|----------|----------------| | 429 with Retry-After | Wait exact time | As specified | | 429 without header | Exponential backoff | 1s, 2s, 4s, 8s, 16s | | 500 Server Error | Exponential + jitter | 1-2s, 2-4s, 4-8s | | Network timeout | Linear backoff | 5s, 10s, 15s | | Connection refused | Fast fail after 3 tries | 1s, 1s, 1s |
Best Practices Checklist
- Always check
Retry-Afterheader first - Implement exponential backoff with jitter
- Set maximum retry attempts (typically 3-5)
- Cache responses when possible
- Use batch endpoints where available
- Monitor rate limit usage proactively
- Distribute requests across time windows
- Consider multiple API keys for high-volume needs
High-Volume API Integration?
Managing API rate limits at scale requires careful architecture. Our team specializes in:
- API gateway implementation
- Request queue management
- Multi-provider failover strategies
- Cost optimization for API usage