OpenAI API errors can stop your AI application dead in its tracks. This guide covers all common errors and their solutions.
Error: 401 Invalid API Key
Symptom:
{
"error": {
"message": "Incorrect API key provided: sk-xxxx",
"type": "invalid_request_error",
"code": "invalid_api_key"
}
}Solution 1 - Verify API key:
import openai
import os
# Check if key is set
api_key = os.getenv("OPENAI_API_KEY")
print(f"Key starts with: {api_key[:10]}..." if api_key else "No key found!")
# Correct initialization
client = openai.OpenAI(api_key=api_key)
# Test the key
try:
models = client.models.list()
print("API key is valid!")
except openai.AuthenticationError as e:
print(f"Invalid API key: {e}")Solution 2 - Check key format:
# Valid key formats:
# sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx (standard)
# sk-proj-xxxxxxxx (project key)
# Common mistakes:
# - Extra spaces: "sk-xxx " (trailing space)
# - Missing sk- prefix
# - Using organization ID instead of API keySolution 3 - Regenerate key:
- Go to https://platform.openai.com/api-keys
- Create new secret key
- Copy immediately (shown only once)
- Update environment variable
Error: 429 Rate Limit Exceeded
Symptom:
{
"error": {
"message": "Rate limit reached for gpt-4 in organization org-xxx",
"type": "tokens",
"code": "rate_limit_exceeded"
}
}Solution 1 - Implement exponential backoff:
import openai
import time
from tenacity import retry, wait_exponential, stop_after_attempt
@retry(
wait=wait_exponential(multiplier=1, min=4, max=60),
stop=stop_after_attempt(5)
)
def call_openai_with_retry(messages):
client = openai.OpenAI()
return client.chat.completions.create(
model="gpt-4",
messages=messages
)
# Usage
try:
response = call_openai_with_retry([
{"role": "user", "content": "Hello!"}
])
except openai.RateLimitError as e:
print(f"Rate limit hit after retries: {e}")Solution 2 - Check rate limit headers:
import openai
client = openai.OpenAI()
response = client.chat.completions.with_raw_response.create(
model="gpt-4",
messages=[{"role": "user", "content": "Hi"}]
)
# Check headers
print(f"Requests remaining: {response.headers.get('x-ratelimit-remaining-requests')}")
print(f"Tokens remaining: {response.headers.get('x-ratelimit-remaining-tokens')}")
print(f"Reset time: {response.headers.get('x-ratelimit-reset-requests')}")Solution 3 - Request rate limit increase:
- Go to https://platform.openai.com/account/limits
- Request increase based on usage tier
- Add payment method to increase from free tier
Error: Context Length Exceeded
Symptom:
{
"error": {
"message": "This model's maximum context length is 8192 tokens. However, your messages resulted in 12000 tokens.",
"type": "invalid_request_error",
"code": "context_length_exceeded"
}
}Solution 1 - Count and truncate tokens:
import tiktoken
def count_tokens(text, model="gpt-4"):
encoding = tiktoken.encoding_for_model(model)
return len(encoding.encode(text))
def truncate_to_limit(messages, max_tokens=7000, model="gpt-4"):
encoding = tiktoken.encoding_for_model(model)
total_tokens = 0
truncated = []
for msg in messages:
msg_tokens = len(encoding.encode(msg["content"]))
if total_tokens + msg_tokens > max_tokens:
# Truncate this message
available = max_tokens - total_tokens
tokens = encoding.encode(msg["content"])[:available]
msg["content"] = encoding.decode(tokens)
truncated.append(msg)
break
total_tokens += msg_tokens
truncated.append(msg)
return truncatedSolution 2 - Use model with larger context:
# Model context limits:
# gpt-3.5-turbo: 16,385 tokens
# gpt-4: 8,192 tokens
# gpt-4-turbo: 128,000 tokens
# gpt-4o: 128,000 tokens
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-4-turbo", # Larger context
messages=long_conversation
)Solution 3 - Implement conversation summarization:
def summarize_conversation(messages, client):
"""Summarize old messages to save context space."""
if len(messages) < 10:
return messages
# Keep system message and last 5 messages
system_msg = messages[0] if messages[0]["role"] == "system" else None
recent = messages[-5:]
old = messages[1:-5] if system_msg else messages[:-5]
# Summarize old messages
summary_prompt = f"Summarize this conversation briefly:\n{old}"
summary = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": summary_prompt}]
).choices[0].message.content
# Return compressed conversation
result = []
if system_msg:
result.append(system_msg)
result.append({"role": "system", "content": f"Previous conversation summary: {summary}"})
result.extend(recent)
return resultError: Timeout / Connection Error
Symptom:
openai.APITimeoutError: Request timed out
openai.APIConnectionError: Connection error
Solution 1 - Increase timeout:
import openai
import httpx
client = openai.OpenAI(
timeout=httpx.Timeout(60.0, connect=10.0) # 60s total, 10s connect
)
# Or per-request
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}],
timeout=120.0
)Solution 2 - Use streaming for long responses:
client = openai.OpenAI()
stream = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": "Write a long story"}],
stream=True
)
full_response = ""
for chunk in stream:
if chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
full_response += content
print(content, end="", flush=True)Solution 3 - Implement retry with timeout handling:
import openai
from tenacity import retry, retry_if_exception_type, wait_exponential
@retry(
retry=retry_if_exception_type((
openai.APITimeoutError,
openai.APIConnectionError
)),
wait=wait_exponential(multiplier=1, min=2, max=30),
stop=stop_after_attempt(3)
)
def robust_completion(messages):
client = openai.OpenAI(timeout=60.0)
return client.chat.completions.create(
model="gpt-4",
messages=messages
)Error: Model Not Found
Symptom:
{
"error": {
"message": "The model 'gpt-5' does not exist",
"type": "invalid_request_error",
"code": "model_not_found"
}
}Solution - Use valid model names:
# Current valid models (as of 2026):
VALID_MODELS = {
"chat": [
"gpt-4o", # Latest, recommended
"gpt-4o-mini", # Faster, cheaper
"gpt-4-turbo", # Large context
"gpt-4", # Original GPT-4
"gpt-3.5-turbo", # Fast and cheap
],
"embeddings": [
"text-embedding-3-large",
"text-embedding-3-small",
"text-embedding-ada-002",
],
"images": [
"dall-e-3",
"dall-e-2",
]
}
# List available models
client = openai.OpenAI()
models = client.models.list()
for model in models:
print(model.id)Error: Content Policy Violation
Symptom:
{
"error": {
"message": "Your request was rejected as a result of our safety system",
"type": "invalid_request_error",
"code": "content_policy_violation"
}
}Solution 1 - Review content guidelines:
# OpenAI prohibits:
# - Illegal content
# - Harassment/violence
# - Adult content
# - Misinformation
# - Malware/spam generation
# If legitimate use case is blocked, try:
# 1. Rephrase the request
# 2. Add context explaining legitimate purpose
# 3. Use moderation endpoint firstSolution 2 - Use moderation API:
client = openai.OpenAI()
def check_content(text):
response = client.moderations.create(input=text)
result = response.results[0]
if result.flagged:
print("Content flagged for:")
for category, flagged in result.categories:
if flagged:
print(f" - {category}")
return False
return True
# Check before sending
if check_content(user_input):
# Safe to send to GPT
response = client.chat.completions.create(...)Error: Insufficient Quota
Symptom:
{
"error": {
"message": "You exceeded your current quota, please check your plan and billing details",
"type": "insufficient_quota",
"code": "insufficient_quota"
}
}Solution:
- Check usage: https://platform.openai.com/usage
- Add payment method: https://platform.openai.com/account/billing
- Set usage limits to prevent overage
- Monitor costs with alerts
# Track token usage
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": "Hello"}]
)
usage = response.usage
print(f"Prompt tokens: {usage.prompt_tokens}")
print(f"Completion tokens: {usage.completion_tokens}")
print(f"Total tokens: {usage.total_tokens}")
# Estimate cost (prices vary by model)
cost = (usage.prompt_tokens * 0.00003) + (usage.completion_tokens * 0.00006)
print(f"Estimated cost: ${cost:.4f}")Quick Reference: Error Codes
| Code | Meaning | Solution | |------|---------|----------| | 401 | Invalid API key | Regenerate key | | 429 | Rate limit | Exponential backoff | | 400 | Bad request | Check parameters | | 404 | Model not found | Use valid model name | | 500 | Server error | Retry with backoff | | 503 | Overloaded | Wait and retry |
Production Checklist
import openai
from tenacity import retry, wait_exponential, stop_after_attempt
class RobustOpenAIClient:
def __init__(self):
self.client = openai.OpenAI(
timeout=60.0,
max_retries=3
)
@retry(
wait=wait_exponential(multiplier=1, min=2, max=60),
stop=stop_after_attempt(5)
)
def chat(self, messages, model="gpt-4o"):
try:
return self.client.chat.completions.create(
model=model,
messages=messages
)
except openai.RateLimitError:
raise # Let tenacity handle retry
except openai.APIError as e:
print(f"API error: {e}")
raiseBuilding AI Applications?
Production AI applications require robust error handling and security. Our team specializes in:
- AI application architecture
- LLM security audits
- Cost optimization strategies
- EU AI Act compliance