From Jupyter Notebook to Production: A Practical MLOps Migration Guide
Your model works in a notebook. Now you need it in production. This is the most common transition in ML engineering, and it's where most projects get stuck. This guide provides a step-by-step path from notebook prototype to production-ready ML pipeline.
Step 1: Extract Functions from Cells
Before (Notebook):
# Cell 1
import pandas as pd
df = pd.read_csv("data.csv")
df = df.dropna()
df["age_bucket"] = pd.cut(df["age"], bins=[0, 25, 45, 65, 100])
# Cell 2
from sklearn.model_selection import train_test_split
X = df.drop("target", axis=1)
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Cell 3
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier(n_estimators=100, max_depth=5)
model.fit(X_train, y_train)
print(f"Accuracy: {model.score(X_test, y_test)}")After (Modular Python):
# src/data.py
import pandas as pd
def load_data(path: str) -> pd.DataFrame:
"""Load and clean training data."""
df = pd.read_csv(path)
df = df.dropna()
return df
def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
"""Apply feature engineering."""
df = df.copy()
df["age_bucket"] = pd.cut(df["age"], bins=[0, 25, 45, 65, 100])
return df
def split_data(df: pd.DataFrame, target: str, test_size: float = 0.2, random_state: int = 42):
"""Split data into train and test sets."""
from sklearn.model_selection import train_test_split
X = df.drop(target, axis=1)
y = df[target]
return train_test_split(X, y, test_size=test_size, random_state=random_state)# src/train.py
import mlflow
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score
from src.data import load_data, engineer_features, split_data
def train(data_path: str, params: dict) -> dict:
"""Train model and return metrics."""
# Load and prepare data
df = load_data(data_path)
df = engineer_features(df)
X_train, X_test, y_train, y_test = split_data(df, "target")
# Train
mlflow.set_experiment("churn-predictor")
with mlflow.start_run():
mlflow.log_params(params)
model = GradientBoostingClassifier(**params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
metrics = {
"accuracy": accuracy_score(y_test, y_pred),
"f1": f1_score(y_test, y_pred, average="weighted"),
}
mlflow.log_metrics(metrics)
mlflow.sklearn.log_model(model, "model", registered_model_name="churn-predictor")
return metrics
if __name__ == "__main__":
params = {"n_estimators": 100, "max_depth": 5, "random_state": 42}
metrics = train("data/training.csv", params)
print(f"Metrics: {metrics}")Step 2: Add Configuration
Move hardcoded values to config files:
# configs/training.yaml
data:
path: "data/training.csv"
target_column: "target"
test_size: 0.2
model:
type: "gradient_boosting"
params:
n_estimators: 100
max_depth: 5
learning_rate: 0.1
random_state: 42
experiment:
name: "churn-predictor"
tracking_uri: "http://mlflow:5000"# src/config.py
import yaml
from dataclasses import dataclass
@dataclass
class TrainingConfig:
data_path: str
target_column: str
test_size: float
model_params: dict
experiment_name: str
@classmethod
def from_yaml(cls, path: str) -> "TrainingConfig":
with open(path) as f:
cfg = yaml.safe_load(f)
return cls(
data_path=cfg["data"]["path"],
target_column=cfg["data"]["target_column"],
test_size=cfg["data"]["test_size"],
model_params=cfg["model"]["params"],
experiment_name=cfg["experiment"]["name"],
)Step 3: Add Tests
# tests/test_data.py
import pytest
import pandas as pd
from src.data import load_data, engineer_features, split_data
def test_load_data_removes_nulls(tmp_path):
# Create test CSV with nulls
df = pd.DataFrame({"age": [25, None, 35], "target": [0, 1, 0]})
path = tmp_path / "test.csv"
df.to_csv(path, index=False)
result = load_data(str(path))
assert result.isnull().sum().sum() == 0
assert len(result) == 2 # Null row removed
def test_split_data_proportions():
df = pd.DataFrame({
"feature": range(100),
"target": [0, 1] * 50,
})
X_train, X_test, y_train, y_test = split_data(df, "target", test_size=0.2)
assert len(X_test) == 20
assert len(X_train) == 80
def test_split_data_no_leakage():
df = pd.DataFrame({
"id": range(100),
"feature": range(100),
"target": [0, 1] * 50,
})
X_train, X_test, _, _ = split_data(df, "target")
train_ids = set(X_train["id"])
test_ids = set(X_test["id"])
assert train_ids.isdisjoint(test_ids)# tests/test_model.py
import pytest
import numpy as np
def test_model_accuracy_above_threshold(trained_model, test_data):
from sklearn.metrics import accuracy_score
y_pred = trained_model.predict(test_data["X"])
accuracy = accuracy_score(test_data["y"], y_pred)
assert accuracy >= 0.80
def test_model_prediction_shape(trained_model, test_data):
predictions = trained_model.predict(test_data["X"])
assert predictions.shape == test_data["y"].shape
def test_model_handles_single_sample(trained_model):
single = np.random.randn(1, trained_model.n_features_in_)
result = trained_model.predict(single)
assert result.shape == (1,)Step 4: Add a Serving Layer
# src/serve.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import mlflow.pyfunc
app = FastAPI(title="Churn Prediction API")
model = None
@app.on_event("startup")
def load_model():
global model
model = mlflow.pyfunc.load_model("models:/churn-predictor/Production")
class PredictionRequest(BaseModel):
features: dict[str, float]
class PredictionResponse(BaseModel):
churn_probability: float
prediction: int
@app.post("/predict", response_model=PredictionResponse)
async def predict(request: PredictionRequest):
import pandas as pd
df = pd.DataFrame([request.features])
prediction = model.predict(df)
return PredictionResponse(
churn_probability=float(prediction[0]),
prediction=int(prediction[0] > 0.5),
)
@app.get("/health")
async def health():
return {"status": "healthy", "model_loaded": model is not None}Step 5: Containerize
# Dockerfile
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY src/ src/
COPY configs/ configs/
# For serving
EXPOSE 8080
CMD ["uvicorn", "src.serve:app", "--host", "0.0.0.0", "--port", "8080"]# docker-compose.yaml
services:
mlflow:
image: ghcr.io/mlflow/mlflow:latest
ports: ["5000:5000"]
environment:
- BACKEND_STORE_URI=sqlite:///mlflow.db
training:
build: .
command: python -m src.train
volumes: ["./data:/app/data", "./configs:/app/configs"]
depends_on: [mlflow]
serving:
build: .
ports: ["8080:8080"]
depends_on: [mlflow]Step 6: Add CI/CD
# .github/workflows/ml-pipeline.yml
name: ML Pipeline
on:
push:
paths: ["src/**", "configs/**", "tests/**"]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with: { python-version: "3.11" }
- run: pip install -r requirements.txt -r requirements-dev.txt
- run: pytest tests/ -v
train:
needs: test
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: pip install -r requirements.txt
- run: python -m src.train
- run: python -m src.evaluate
deploy:
needs: train
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
steps:
- run: docker build -t churn-api:latest .
- run: docker push registry.company.com/churn-api:latestThe Complete Migration Checklist
| Step | Notebook | Production | Done | |------|----------|-----------|------| | 1 | Inline code in cells | Modular functions in .py files | | | 2 | Hardcoded values | YAML configuration | | | 3 | Print statements | MLflow experiment tracking | | | 4 | Manual data loading | Data versioning (DVC) | | | 5 | No tests | pytest suite (data + model) | | | 6 | No API | FastAPI serving endpoint | | | 7 | Local execution | Containerized (Docker) | | | 8 | Manual deployment | CI/CD pipeline | | | 9 | No monitoring | Drift detection + alerting | |
Related Resources
- What is MLOps? — The full MLOps framework
- MLflow tutorial — Add experiment tracking
- ML CI/CD — Automated deployment
- MLOps for small teams — Lean approach
- Common MLOps mistakes — What to avoid
Need help migrating from notebooks to production? DeviDevs builds production ML pipelines from prototype to deployment. Get a free assessment →