From Jupyter Notebook to Production: A Practical MLOps Migration Guide

Your model works in a notebook. Now you need it in production. This is the most common transition in ML engineering, and it's where most projects get stuck. This guide provides a step-by-step path from notebook prototype to production-ready ML pipeline.

Step 1: Extract Functions from Cells

Before (Notebook):

# Cell 1
import pandas as pd
df = pd.read_csv("data.csv")
df = df.dropna()
df["age_bucket"] = pd.cut(df["age"], bins=[0, 25, 45, 65, 100])
 
# Cell 2
from sklearn.model_selection import train_test_split
X = df.drop("target", axis=1)
y = df["target"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
 
# Cell 3
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier(n_estimators=100, max_depth=5)
model.fit(X_train, y_train)
print(f"Accuracy: {model.score(X_test, y_test)}")

After (Modular Python):

# src/data.py
import pandas as pd
 
def load_data(path: str) -> pd.DataFrame:
    """Load and clean training data."""
    df = pd.read_csv(path)
    df = df.dropna()
    return df
 
def engineer_features(df: pd.DataFrame) -> pd.DataFrame:
    """Apply feature engineering."""
    df = df.copy()
    df["age_bucket"] = pd.cut(df["age"], bins=[0, 25, 45, 65, 100])
    return df
 
def split_data(df: pd.DataFrame, target: str, test_size: float = 0.2, random_state: int = 42):
    """Split data into train and test sets."""
    from sklearn.model_selection import train_test_split
    X = df.drop(target, axis=1)
    y = df[target]
    return train_test_split(X, y, test_size=test_size, random_state=random_state)

# src/train.py
import mlflow
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score
from src.data import load_data, engineer_features, split_data
 
def train(data_path: str, params: dict) -> dict:
    """Train model and return metrics."""
    # Load and prepare data
    df = load_data(data_path)
    df = engineer_features(df)
    X_train, X_test, y_train, y_test = split_data(df, "target")
 
    # Train
    mlflow.set_experiment("churn-predictor")
    with mlflow.start_run():
        mlflow.log_params(params)
 
        model = GradientBoostingClassifier(**params)
        model.fit(X_train, y_train)
 
        y_pred = model.predict(X_test)
        metrics = {
            "accuracy": accuracy_score(y_test, y_pred),
            "f1": f1_score(y_test, y_pred, average="weighted"),
        }
        mlflow.log_metrics(metrics)
        mlflow.sklearn.log_model(model, "model", registered_model_name="churn-predictor")
 
    return metrics
 
if __name__ == "__main__":
    params = {"n_estimators": 100, "max_depth": 5, "random_state": 42}
    metrics = train("data/training.csv", params)
    print(f"Metrics: {metrics}")

Step 2: Add Configuration

Move hardcoded values to config files:

# configs/training.yaml
data:
  path: "data/training.csv"
  target_column: "target"
  test_size: 0.2
 
model:
  type: "gradient_boosting"
  params:
    n_estimators: 100
    max_depth: 5
    learning_rate: 0.1
    random_state: 42
 
experiment:
  name: "churn-predictor"
  tracking_uri: "http://mlflow:5000"

# src/config.py
import yaml
from dataclasses import dataclass
 
@dataclass
class TrainingConfig:
    data_path: str
    target_column: str
    test_size: float
    model_params: dict
    experiment_name: str
 
    @classmethod
    def from_yaml(cls, path: str) -> "TrainingConfig":
        with open(path) as f:
            cfg = yaml.safe_load(f)
        return cls(
            data_path=cfg["data"]["path"],
            target_column=cfg["data"]["target_column"],
            test_size=cfg["data"]["test_size"],
            model_params=cfg["model"]["params"],
            experiment_name=cfg["experiment"]["name"],
        )

Step 3: Add Tests

# tests/test_data.py
import pytest
import pandas as pd
from src.data import load_data, engineer_features, split_data
 
def test_load_data_removes_nulls(tmp_path):
    # Create test CSV with nulls
    df = pd.DataFrame({"age": [25, None, 35], "target": [0, 1, 0]})
    path = tmp_path / "test.csv"
    df.to_csv(path, index=False)
 
    result = load_data(str(path))
    assert result.isnull().sum().sum() == 0
    assert len(result) == 2  # Null row removed
 
def test_split_data_proportions():
    df = pd.DataFrame({
        "feature": range(100),
        "target": [0, 1] * 50,
    })
    X_train, X_test, y_train, y_test = split_data(df, "target", test_size=0.2)
    assert len(X_test) == 20
    assert len(X_train) == 80
 
def test_split_data_no_leakage():
    df = pd.DataFrame({
        "id": range(100),
        "feature": range(100),
        "target": [0, 1] * 50,
    })
    X_train, X_test, _, _ = split_data(df, "target")
    train_ids = set(X_train["id"])
    test_ids = set(X_test["id"])
    assert train_ids.isdisjoint(test_ids)

# tests/test_model.py
import pytest
import numpy as np
 
def test_model_accuracy_above_threshold(trained_model, test_data):
    from sklearn.metrics import accuracy_score
    y_pred = trained_model.predict(test_data["X"])
    accuracy = accuracy_score(test_data["y"], y_pred)
    assert accuracy >= 0.80
 
def test_model_prediction_shape(trained_model, test_data):
    predictions = trained_model.predict(test_data["X"])
    assert predictions.shape == test_data["y"].shape
 
def test_model_handles_single_sample(trained_model):
    single = np.random.randn(1, trained_model.n_features_in_)
    result = trained_model.predict(single)
    assert result.shape == (1,)

Step 4: Add a Serving Layer

# src/serve.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import mlflow.pyfunc
 
app = FastAPI(title="Churn Prediction API")
model = None
 
@app.on_event("startup")
def load_model():
    global model
    model = mlflow.pyfunc.load_model("models:/churn-predictor/Production")
 
class PredictionRequest(BaseModel):
    features: dict[str, float]
 
class PredictionResponse(BaseModel):
    churn_probability: float
    prediction: int
 
@app.post("/predict", response_model=PredictionResponse)
async def predict(request: PredictionRequest):
    import pandas as pd
    df = pd.DataFrame([request.features])
    prediction = model.predict(df)
    return PredictionResponse(
        churn_probability=float(prediction[0]),
        prediction=int(prediction[0] > 0.5),
    )
 
@app.get("/health")
async def health():
    return {"status": "healthy", "model_loaded": model is not None}

Step 5: Containerize

# Dockerfile
FROM python:3.11-slim
 
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
 
COPY src/ src/
COPY configs/ configs/
 
# For serving
EXPOSE 8080
CMD ["uvicorn", "src.serve:app", "--host", "0.0.0.0", "--port", "8080"]

# docker-compose.yaml
services:
  mlflow:
    image: ghcr.io/mlflow/mlflow:latest
    ports: ["5000:5000"]
    environment:
      - BACKEND_STORE_URI=sqlite:///mlflow.db
 
  training:
    build: .
    command: python -m src.train
    volumes: ["./data:/app/data", "./configs:/app/configs"]
    depends_on: [mlflow]
 
  serving:
    build: .
    ports: ["8080:8080"]
    depends_on: [mlflow]

Step 6: Add CI/CD

# .github/workflows/ml-pipeline.yml
name: ML Pipeline
on:
  push:
    paths: ["src/**", "configs/**", "tests/**"]
 
jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with: { python-version: "3.11" }
      - run: pip install -r requirements.txt -r requirements-dev.txt
      - run: pytest tests/ -v
 
  train:
    needs: test
    if: github.ref == 'refs/heads/main'
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - run: pip install -r requirements.txt
      - run: python -m src.train
      - run: python -m src.evaluate
 
  deploy:
    needs: train
    if: github.ref == 'refs/heads/main'
    runs-on: ubuntu-latest
    steps:
      - run: docker build -t churn-api:latest .
      - run: docker push registry.company.com/churn-api:latest

The Complete Migration Checklist

| Step | Notebook | Production | Done | |------|----------|-----------|------| | 1 | Inline code in cells | Modular functions in .py files | | | 2 | Hardcoded values | YAML configuration | | | 3 | Print statements | MLflow experiment tracking | | | 4 | Manual data loading | Data versioning (DVC) | | | 5 | No tests | pytest suite (data + model) | | | 6 | No API | FastAPI serving endpoint | | | 7 | Local execution | Containerized (Docker) | | | 8 | Manual deployment | CI/CD pipeline | | | 9 | No monitoring | Drift detection + alerting | |

What is MLOps? — The full MLOps framework
MLflow tutorial — Add experiment tracking
ML CI/CD — Automated deployment
MLOps for small teams — Lean approach
Common MLOps mistakes — What to avoid

Need help migrating from notebooks to production? DeviDevs builds production ML pipelines from prototype to deployment. Get a free assessment →

From Jupyter Notebook to Production: A Practical MLOps Migration Guide

From Jupyter Notebook to Production: A Practical MLOps Migration Guide

Step 1: Extract Functions from Cells

Step 2: Add Configuration

Step 3: Add Tests

Step 4: Add a Serving Layer

Step 5: Containerize

Step 6: Add CI/CD

The Complete Migration Checklist

Weekly AI Security & Automation Digest

Related Articles

MLOps Best Practices: Building Production-Ready ML Pipelines

What is MLOps? A Complete Guide to Machine Learning Operations in 2026

Common MLOps Mistakes and How to Avoid Them

From Jupyter Notebook to Production: A Practical MLOps Migration Guide

Step 1: Extract Functions from Cells

Step 2: Add Configuration

Step 3: Add Tests

Step 4: Add a Serving Layer

Step 5: Containerize

Step 6: Add CI/CD

The Complete Migration Checklist

Related Resources

Weekly AI Security & Automation Digest

Related Articles

MLOps Best Practices: Building Production-Ready ML Pipelines

What is MLOps? A Complete Guide to Machine Learning Operations in 2026

Common MLOps Mistakes and How to Avoid Them