LLM hallucinations - when AI confidently generates false information - are a critical challenge. This guide covers detection and prevention strategies.
Understanding Hallucinations
Types of LLM Hallucinations:
1. Factual Errors
"The Eiffel Tower was built in 1920" ❌ (Actually 1889)
2. Fabricated Sources
"According to a 2023 Nature study..." ❌ (Study doesn't exist)
3. Logical Inconsistencies
"X is true" → later → "X is false" ❌
4. Entity Confusion
"Einstein invented the telephone" ❌ (Bell did)
Strategy 1: Retrieval-Augmented Generation (RAG)
Ground responses in actual documents:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
# Create vector store from your documents
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# RAG prompt that prevents hallucination
prompt = ChatPromptTemplate.from_template("""
Answer the question based ONLY on the following context.
If the context doesn't contain the answer, say "I don't have information about that."
Do NOT make up information.
Context:
{context}
Question: {question}
Answer:""")
# Chain with retrieval
chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt
| ChatOpenAI(temperature=0)
)
response = chain.invoke("What is our refund policy?")Verify citations exist:
def verify_rag_response(response, retrieved_docs):
"""Check if response content actually comes from documents."""
response_lower = response.lower()
doc_content = " ".join([doc.page_content.lower() for doc in retrieved_docs])
# Extract potential facts from response
sentences = response.split('. ')
verified = []
unverified = []
for sentence in sentences:
# Check if key phrases appear in source docs
words = sentence.lower().split()
key_phrases = [' '.join(words[i:i+3]) for i in range(len(words)-2)]
if any(phrase in doc_content for phrase in key_phrases):
verified.append(sentence)
else:
unverified.append(sentence)
return {
"verified": verified,
"unverified": unverified,
"confidence": len(verified) / (len(verified) + len(unverified))
}Strategy 2: Self-Consistency Checking
Ask the same question multiple ways:
import openai
from collections import Counter
def check_consistency(question, model="gpt-4o", samples=5):
"""Generate multiple responses and check for consistency."""
client = openai.OpenAI()
responses = []
for _ in range(samples):
response = client.chat.completions.create(
model=model,
temperature=0.7, # Some variation
messages=[
{"role": "system", "content": "Answer concisely and factually."},
{"role": "user", "content": question}
]
)
responses.append(response.choices[0].message.content)
# Check if responses agree
# For factual questions, answers should be consistent
return {
"responses": responses,
"unique_answers": len(set(responses)),
"consistent": len(set(responses)) <= 2 # Allow minor variation
}
# Usage
result = check_consistency("What year was Python released?")
if not result["consistent"]:
print("Warning: Inconsistent answers detected - possible hallucination")Strategy 3: Confidence Scoring
Have the model rate its own confidence:
def get_answer_with_confidence(question):
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": """
Answer the question, then rate your confidence on a scale of 1-10.
Format:
Answer: [your answer]
Confidence: [1-10]
Reasoning: [why this confidence level]
If you're not sure, say so. It's better to admit uncertainty than to guess.
"""},
{"role": "user", "content": question}
]
)
text = response.choices[0].message.content
# Parse confidence
import re
confidence_match = re.search(r'Confidence:\s*(\d+)', text)
confidence = int(confidence_match.group(1)) if confidence_match else 5
return {
"full_response": text,
"confidence": confidence,
"needs_verification": confidence < 7
}
# Usage
result = get_answer_with_confidence("Who invented the transistor?")
if result["needs_verification"]:
print("Low confidence - verify this information!")Strategy 4: Fact-Checking Pipeline
Cross-reference with external sources:
import requests
def fact_check_claim(claim):
"""Use external APIs to verify claims."""
# Option 1: Wikipedia search
wiki_url = "https://en.wikipedia.org/w/api.php"
params = {
"action": "query",
"list": "search",
"srsearch": claim,
"format": "json"
}
response = requests.get(wiki_url, params=params)
results = response.json().get("query", {}).get("search", [])
# Option 2: Use a search API
# Option 3: Check against knowledge base
return {
"claim": claim,
"evidence_found": len(results) > 0,
"sources": [r["title"] for r in results[:3]]
}
def validate_response(llm_response):
"""Extract and verify factual claims."""
client = openai.OpenAI()
# Extract claims
extraction = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": """
Extract factual claims from this text that can be verified.
Return as a JSON list of claims.
Only include objective, verifiable facts, not opinions.
"""},
{"role": "user", "content": llm_response}
]
)
import json
claims = json.loads(extraction.choices[0].message.content)
# Verify each claim
results = []
for claim in claims:
verification = fact_check_claim(claim)
results.append(verification)
return resultsStrategy 5: Structured Output Validation
Enforce schema and validate:
from pydantic import BaseModel, validator
from typing import List, Optional
import openai
import json
class FactualResponse(BaseModel):
answer: str
sources: List[str]
confidence_level: str # "high", "medium", "low"
caveats: Optional[List[str]] = None
@validator('confidence_level')
def validate_confidence(cls, v):
if v not in ["high", "medium", "low"]:
raise ValueError("Invalid confidence level")
return v
@validator('sources')
def require_sources(cls, v, values):
if values.get('confidence_level') == 'high' and len(v) == 0:
raise ValueError("High confidence claims need sources")
return v
def get_validated_answer(question):
client = openai.OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": f"""
Answer questions with verified information.
Return JSON matching this schema:
{FactualResponse.schema_json()}
Rules:
- Only claim "high" confidence for well-known facts
- Include sources when possible
- List caveats for uncertain information
"""},
{"role": "user", "content": question}
]
)
data = json.loads(response.choices[0].message.content)
# Validate with Pydantic
try:
validated = FactualResponse(**data)
return validated
except Exception as e:
print(f"Validation failed: {e}")
return NoneStrategy 6: Knowledge Cutoff Awareness
Handle questions about recent events:
from datetime import datetime
def handle_temporal_query(question):
client = openai.OpenAI()
# Check if question is about recent events
temporal_check = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": """
Analyze if this question requires knowledge of events after April 2024.
Return JSON: {"requires_recent": true/false, "reason": "..."}
"""},
{"role": "user", "content": question}
]
)
import json
result = json.loads(temporal_check.choices[0].message.content)
if result["requires_recent"]:
return {
"warning": "This question may require information after my knowledge cutoff (April 2024)",
"recommendation": "Please verify with current sources",
"answer": None
}
# Proceed with answering
answer = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "user", "content": question}
]
)
return {"answer": answer.choices[0].message.content}Strategy 7: Human-in-the-Loop
Flag uncertain responses for review:
class HumanReviewQueue:
def __init__(self):
self.pending_reviews = []
def process_with_review(self, question, answer, confidence):
if confidence < 0.7:
self.pending_reviews.append({
"question": question,
"proposed_answer": answer,
"confidence": confidence,
"status": "pending_review"
})
return {
"answer": "This response is pending human review for accuracy.",
"draft": answer,
"review_id": len(self.pending_reviews) - 1
}
return {"answer": answer, "verified": True}
def approve_review(self, review_id, corrected_answer=None):
review = self.pending_reviews[review_id]
review["status"] = "approved"
review["final_answer"] = corrected_answer or review["proposed_answer"]
return review["final_answer"]
# Usage in production
review_queue = HumanReviewQueue()
def answer_question(question):
llm_answer = get_llm_response(question)
confidence = estimate_confidence(llm_answer)
return review_queue.process_with_review(question, llm_answer, confidence)Production Anti-Hallucination Checklist
def production_safe_response(question, context=None):
"""Complete pipeline for hallucination-resistant responses."""
# 1. If context available, use RAG
if context:
grounded_answer = rag_chain.invoke({
"question": question,
"context": context
})
else:
grounded_answer = None
# 2. Get LLM response with confidence
llm_result = get_answer_with_confidence(question)
# 3. Check consistency
consistency = check_consistency(question, samples=3)
# 4. Determine final response
if grounded_answer and llm_result["confidence"] >= 7:
return {
"answer": grounded_answer,
"source": "grounded",
"confidence": "high"
}
elif consistency["consistent"] and llm_result["confidence"] >= 7:
return {
"answer": llm_result["full_response"],
"source": "llm",
"confidence": "medium",
"note": "Verified through consistency check"
}
else:
return {
"answer": "I'm not confident enough to answer this accurately.",
"confidence": "low",
"recommendation": "Please verify with authoritative sources"
}Quick Reference: Prevention Techniques
| Technique | Best For | Complexity | |-----------|----------|------------| | RAG | Domain-specific facts | Medium | | Self-consistency | General questions | Low | | Confidence scoring | All responses | Low | | Fact-checking | Critical claims | High | | Structured output | Data extraction | Medium | | Human review | High-stakes decisions | High |
Building Reliable AI Systems?
Hallucination prevention is critical for enterprise AI. Our team offers:
- AI reliability audits
- Custom fact-checking pipelines
- RAG implementation consulting
- EU AI Act compliance