Software Composition Analysis (SCA): Securing Your Open Source Dependencies

Modern applications typically comprise 70-90% open source code, making Software Composition Analysis (SCA) essential for security. This guide covers implementing comprehensive SCA in your DevSecOps pipeline.

Understanding SCA Components

Dependency Analysis Architecture

# sca_analyzer.py
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Set
from enum import Enum
import json
 
class SeverityLevel(Enum):
    CRITICAL = "critical"
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
    INFO = "info"
 
class LicenseRisk(Enum):
    PERMISSIVE = "permissive"  # MIT, Apache, BSD
    WEAK_COPYLEFT = "weak_copyleft"  # LGPL, MPL
    STRONG_COPYLEFT = "strong_copyleft"  # GPL, AGPL
    COMMERCIAL = "commercial"
    UNKNOWN = "unknown"
 
@dataclass
class Vulnerability:
    cve_id: str
    severity: SeverityLevel
    cvss_score: float
    description: str
    affected_versions: str
    fixed_version: Optional[str]
    references: List[str] = field(default_factory=list)
    exploitability: str = "unknown"
 
@dataclass
class License:
    name: str
    spdx_id: str
    risk_level: LicenseRisk
    requires_attribution: bool
    requires_disclosure: bool
    commercial_use_allowed: bool
 
@dataclass
class Dependency:
    name: str
    version: str
    ecosystem: str  # npm, pypi, maven, etc.
    direct: bool  # Direct or transitive
    license: Optional[License]
    vulnerabilities: List[Vulnerability] = field(default_factory=list)
    dependencies: List[str] = field(default_factory=list)
 
@dataclass
class SCAReport:
    project_name: str
    scan_timestamp: str
    total_dependencies: int
    direct_dependencies: int
    transitive_dependencies: int
    vulnerabilities_by_severity: Dict[str, int]
    license_breakdown: Dict[str, int]
    dependencies: List[Dependency]
    policy_violations: List[str]
 
class SCAAnalyzer:
    """Core SCA analysis engine"""
 
    # License classification
    LICENSE_MAP = {
        "MIT": License("MIT", "MIT", LicenseRisk.PERMISSIVE, True, False, True),
        "Apache-2.0": License("Apache 2.0", "Apache-2.0", LicenseRisk.PERMISSIVE, True, False, True),
        "BSD-3-Clause": License("BSD 3-Clause", "BSD-3-Clause", LicenseRisk.PERMISSIVE, True, False, True),
        "ISC": License("ISC", "ISC", LicenseRisk.PERMISSIVE, True, False, True),
        "LGPL-2.1": License("LGPL 2.1", "LGPL-2.1", LicenseRisk.WEAK_COPYLEFT, True, True, True),
        "LGPL-3.0": License("LGPL 3.0", "LGPL-3.0", LicenseRisk.WEAK_COPYLEFT, True, True, True),
        "MPL-2.0": License("MPL 2.0", "MPL-2.0", LicenseRisk.WEAK_COPYLEFT, True, True, True),
        "GPL-2.0": License("GPL 2.0", "GPL-2.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
        "GPL-3.0": License("GPL 3.0", "GPL-3.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
        "AGPL-3.0": License("AGPL 3.0", "AGPL-3.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
    }
 
    def __init__(self, vulnerability_db, policy_config):
        self.vuln_db = vulnerability_db
        self.policy = policy_config
 
    def analyze(self, dependencies: List[Dependency]) -> SCAReport:
        """Perform full SCA analysis"""
        # Enrich with vulnerability data
        for dep in dependencies:
            dep.vulnerabilities = self.vuln_db.lookup(
                dep.ecosystem, dep.name, dep.version
            )
            dep.license = self._identify_license(dep)
 
        # Check policy violations
        violations = self._check_policy_violations(dependencies)
 
        # Generate report
        return self._generate_report(dependencies, violations)
 
    def _identify_license(self, dep: Dependency) -> Optional[License]:
        """Identify and classify license"""
        # This would typically call package registry API
        license_id = dep.raw_license if hasattr(dep, 'raw_license') else None
        return self.LICENSE_MAP.get(license_id)
 
    def _check_policy_violations(self, dependencies: List[Dependency]) -> List[str]:
        """Check dependencies against policy"""
        violations = []
 
        for dep in dependencies:
            # Check vulnerability severity thresholds
            for vuln in dep.vulnerabilities:
                if vuln.severity == SeverityLevel.CRITICAL:
                    if not self.policy.get('allow_critical', False):
                        violations.append(
                            f"CRITICAL vulnerability {vuln.cve_id} in {dep.name}@{dep.version}"
                        )
 
            # Check license compliance
            if dep.license:
                blocked = self.policy.get('blocked_licenses', [])
                if dep.license.spdx_id in blocked:
                    violations.append(
                        f"Blocked license {dep.license.spdx_id} in {dep.name}"
                    )
 
            # Check deprecated/banned packages
            banned = self.policy.get('banned_packages', [])
            if dep.name in banned:
                violations.append(f"Banned package: {dep.name}")
 
        return violations
 
    def _generate_report(
        self,
        dependencies: List[Dependency],
        violations: List[str]
    ) -> SCAReport:
        """Generate comprehensive SCA report"""
        # Count vulnerabilities by severity
        vuln_counts = {s.value: 0 for s in SeverityLevel}
        for dep in dependencies:
            for vuln in dep.vulnerabilities:
                vuln_counts[vuln.severity.value] += 1
 
        # Count licenses
        license_counts = {}
        for dep in dependencies:
            if dep.license:
                license_id = dep.license.spdx_id
                license_counts[license_id] = license_counts.get(license_id, 0) + 1
 
        return SCAReport(
            project_name="",
            scan_timestamp=datetime.utcnow().isoformat(),
            total_dependencies=len(dependencies),
            direct_dependencies=sum(1 for d in dependencies if d.direct),
            transitive_dependencies=sum(1 for d in dependencies if not d.direct),
            vulnerabilities_by_severity=vuln_counts,
            license_breakdown=license_counts,
            dependencies=dependencies,
            policy_violations=violations
        )

CI/CD Pipeline Integration

GitHub Actions SCA Workflow

# .github/workflows/sca-scan.yml
name: Software Composition Analysis
 
on:
  push:
    branches: [main, develop]
  pull_request:
    branches: [main]
  schedule:
    - cron: '0 6 * * *'  # Daily at 6 AM
 
jobs:
  dependency-scan:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      security-events: write
 
    steps:
      - uses: actions/checkout@v4
 
      # npm/Node.js scanning
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '20'
 
      - name: Install Dependencies
        run: npm ci
 
      - name: npm audit
        run: |
          npm audit --json > npm-audit.json || true
          node scripts/parse-npm-audit.js npm-audit.json
 
      # Snyk scanning
      - name: Run Snyk to check for vulnerabilities
        uses: snyk/actions/node@master
        continue-on-error: true
        env:
          SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
        with:
          args: --severity-threshold=high --json-file-output=snyk-results.json
 
      # OWASP Dependency Check
      - name: OWASP Dependency Check
        uses: dependency-check/Dependency-Check_Action@main
        with:
          project: 'my-project'
          path: '.'
          format: 'JSON'
          out: 'dependency-check-report'
          args: >
            --suppression suppression.xml
            --failOnCVSS 7
 
      # Trivy for container dependencies
      - name: Run Trivy vulnerability scanner
        uses: aquasecurity/trivy-action@master
        with:
          scan-type: 'fs'
          scan-ref: '.'
          format: 'sarif'
          output: 'trivy-results.sarif'
 
      # Upload to GitHub Security
      - name: Upload Trivy results to GitHub Security
        uses: github/codeql-action/upload-sarif@v2
        with:
          sarif_file: 'trivy-results.sarif'
 
      # License scanning
      - name: License Compliance Check
        run: |
          npx license-checker --json --out licenses.json
          node scripts/check-license-compliance.js licenses.json
 
      # Generate SBOM
      - name: Generate SBOM
        run: |
          npx @cyclonedx/cyclonedx-npm --output-file sbom.json
 
      - name: Upload SBOM
        uses: actions/upload-artifact@v4
        with:
          name: sbom
          path: sbom.json
 
  python-scan:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
 
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
 
      - name: Install dependencies
        run: |
          pip install safety pip-audit
 
      - name: Safety Check
        run: |
          pip freeze > requirements-lock.txt
          safety check -r requirements-lock.txt --json > safety-report.json || true
 
      - name: pip-audit
        run: |
          pip-audit --format json --output pip-audit.json || true
 
      - name: Upload Python scan results
        uses: actions/upload-artifact@v4
        with:
          name: python-security-reports
          path: |
            safety-report.json
            pip-audit.json

License Compliance Checker

// scripts/check-license-compliance.js
const fs = require('fs');
 
// Policy configuration
const policy = {
  allowed: [
    'MIT',
    'Apache-2.0',
    'BSD-2-Clause',
    'BSD-3-Clause',
    'ISC',
    'CC0-1.0',
    '0BSD',
    'Unlicense'
  ],
  conditional: [
    // Allowed but require review
    'LGPL-2.1',
    'LGPL-3.0',
    'MPL-2.0'
  ],
  blocked: [
    'GPL-2.0',
    'GPL-3.0',
    'AGPL-3.0',
    'SSPL-1.0',
    'BUSL-1.1'
  ],
  unknownAction: 'warn'  // 'fail', 'warn', or 'allow'
};
 
function checkLicenses(licensesFile) {
  const licenses = JSON.parse(fs.readFileSync(licensesFile, 'utf8'));
 
  const results = {
    passed: [],
    warnings: [],
    failures: [],
    unknown: []
  };
 
  for (const [pkg, info] of Object.entries(licenses)) {
    const license = info.licenses;
 
    if (policy.blocked.includes(license)) {
      results.failures.push({
        package: pkg,
        license,
        reason: 'License is blocked by policy'
      });
    } else if (policy.conditional.includes(license)) {
      results.warnings.push({
        package: pkg,
        license,
        reason: 'License requires legal review'
      });
    } else if (policy.allowed.includes(license)) {
      results.passed.push({ package: pkg, license });
    } else {
      results.unknown.push({
        package: pkg,
        license,
        reason: 'Unknown license - requires review'
      });
    }
  }
 
  // Output results
  console.log('\n=== License Compliance Report ===\n');
 
  console.log(`✅ Passed: ${results.passed.length} packages`);
  console.log(`⚠️  Warnings: ${results.warnings.length} packages`);
  console.log(`❌ Failures: ${results.failures.length} packages`);
  console.log(`❓ Unknown: ${results.unknown.length} packages`);
 
  if (results.failures.length > 0) {
    console.log('\n--- Blocked Licenses ---');
    results.failures.forEach(f => {
      console.log(`  ❌ ${f.package}: ${f.license}`);
    });
  }
 
  if (results.warnings.length > 0) {
    console.log('\n--- Licenses Requiring Review ---');
    results.warnings.forEach(w => {
      console.log(`  ⚠️  ${w.package}: ${w.license}`);
    });
  }
 
  // Exit code based on failures
  const exitCode = results.failures.length > 0 ? 1 : 0;
  process.exit(exitCode);
}
 
// Run
const licensesFile = process.argv[2] || 'licenses.json';
checkLicenses(licensesFile);

SBOM Generation and Management

CycloneDX SBOM Generator

# sbom_generator.py
import json
import hashlib
from datetime import datetime
from typing import List, Dict
import uuid
 
class SBOMGenerator:
    """Generate CycloneDX SBOM"""
 
    def __init__(self, project_name: str, version: str):
        self.project_name = project_name
        self.version = version
 
    def generate(self, dependencies: List[Dict]) -> Dict:
        """Generate CycloneDX format SBOM"""
        sbom = {
            "bomFormat": "CycloneDX",
            "specVersion": "1.5",
            "serialNumber": f"urn:uuid:{uuid.uuid4()}",
            "version": 1,
            "metadata": {
                "timestamp": datetime.utcnow().isoformat() + "Z",
                "tools": [{
                    "vendor": "Custom",
                    "name": "sbom-generator",
                    "version": "1.0.0"
                }],
                "component": {
                    "type": "application",
                    "name": self.project_name,
                    "version": self.version,
                    "bom-ref": f"pkg:{self.project_name}@{self.version}"
                }
            },
            "components": [],
            "dependencies": []
        }
 
        # Add components
        for dep in dependencies:
            component = self._create_component(dep)
            sbom["components"].append(component)
 
            # Add dependency relationships
            sbom["dependencies"].append({
                "ref": component["bom-ref"],
                "dependsOn": [
                    self._create_purl(d) for d in dep.get("dependencies", [])
                ]
            })
 
        return sbom
 
    def _create_component(self, dep: Dict) -> Dict:
        """Create CycloneDX component"""
        purl = self._create_purl(dep)
 
        component = {
            "type": "library",
            "bom-ref": purl,
            "name": dep["name"],
            "version": dep["version"],
            "purl": purl,
            "scope": "required" if dep.get("direct", True) else "optional"
        }
 
        # Add license if available
        if dep.get("license"):
            component["licenses"] = [{
                "license": {
                    "id": dep["license"]
                }
            }]
 
        # Add hashes if available
        if dep.get("integrity"):
            component["hashes"] = [{
                "alg": "SHA-512",
                "content": dep["integrity"].replace("sha512-", "")
            }]
 
        # Add external references
        if dep.get("repository"):
            component["externalReferences"] = [{
                "type": "vcs",
                "url": dep["repository"]
            }]
 
        return component
 
    def _create_purl(self, dep: Dict) -> str:
        """Create Package URL (PURL)"""
        ecosystem = dep.get("ecosystem", "npm")
        name = dep["name"]
        version = dep["version"]
 
        # Handle scoped packages
        if name.startswith("@"):
            namespace, pkg_name = name[1:].split("/")
            return f"pkg:{ecosystem}/{namespace}/{pkg_name}@{version}"
 
        return f"pkg:{ecosystem}/{name}@{version}"
 
    def export_json(self, sbom: Dict, filepath: str):
        """Export SBOM to JSON file"""
        with open(filepath, 'w') as f:
            json.dump(sbom, f, indent=2)
 
    def export_xml(self, sbom: Dict, filepath: str):
        """Export SBOM to XML format"""
        import xml.etree.ElementTree as ET
 
        root = ET.Element("bom")
        root.set("xmlns", "http://cyclonedx.org/schema/bom/1.5")
        root.set("version", str(sbom["version"]))
        root.set("serialNumber", sbom["serialNumber"])
 
        # Add metadata
        metadata = ET.SubElement(root, "metadata")
        timestamp = ET.SubElement(metadata, "timestamp")
        timestamp.text = sbom["metadata"]["timestamp"]
 
        # Add components
        components = ET.SubElement(root, "components")
        for comp in sbom["components"]:
            component = ET.SubElement(components, "component")
            component.set("type", comp["type"])
            component.set("bom-ref", comp["bom-ref"])
 
            name = ET.SubElement(component, "name")
            name.text = comp["name"]
 
            version = ET.SubElement(component, "version")
            version.text = comp["version"]
 
            purl = ET.SubElement(component, "purl")
            purl.text = comp["purl"]
 
        tree = ET.ElementTree(root)
        tree.write(filepath, encoding="utf-8", xml_declaration=True)

Vulnerability Remediation

Automated Dependency Updates

# .github/dependabot.yml
version: 2
updates:
  # npm dependencies
  - package-ecosystem: "npm"
    directory: "/"
    schedule:
      interval: "weekly"
      day: "monday"
      time: "06:00"
    open-pull-requests-limit: 10
    groups:
      # Group minor/patch updates
      production-dependencies:
        dependency-type: "production"
        update-types:
          - "minor"
          - "patch"
      development-dependencies:
        dependency-type: "development"
        update-types:
          - "minor"
          - "patch"
    # Security updates are always separate
    ignore:
      - dependency-name: "*"
        update-types: ["version-update:semver-major"]
    labels:
      - "dependencies"
      - "automated"
    commit-message:
      prefix: "deps"
      include: "scope"
 
  # Python dependencies
  - package-ecosystem: "pip"
    directory: "/"
    schedule:
      interval: "weekly"
    open-pull-requests-limit: 5
 
  # Docker base images
  - package-ecosystem: "docker"
    directory: "/"
    schedule:
      interval: "weekly"
 
  # GitHub Actions
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"

Vulnerability Fix Automation

// scripts/auto-fix-vulnerabilities.js
const { execSync } = require('child_process');
const fs = require('fs');
 
async function autoFixVulnerabilities() {
  console.log('🔍 Scanning for vulnerabilities...\n');
 
  // Run npm audit
  let auditResult;
  try {
    auditResult = JSON.parse(
      execSync('npm audit --json', { encoding: 'utf8' })
    );
  } catch (e) {
    auditResult = JSON.parse(e.stdout);
  }
 
  const vulnerabilities = auditResult.vulnerabilities || {};
  const fixable = [];
  const manual = [];
 
  for (const [pkg, data] of Object.entries(vulnerabilities)) {
    if (data.fixAvailable) {
      if (typeof data.fixAvailable === 'boolean') {
        fixable.push(pkg);
      } else if (!data.fixAvailable.isSemVerMajor) {
        // Non-breaking fix available
        fixable.push(pkg);
      } else {
        // Major version bump required
        manual.push({
          package: pkg,
          severity: data.severity,
          currentVersion: data.range,
          fixVersion: data.fixAvailable.version,
          breaking: true
        });
      }
    } else {
      manual.push({
        package: pkg,
        severity: data.severity,
        reason: 'No fix available'
      });
    }
  }
 
  // Apply automatic fixes
  if (fixable.length > 0) {
    console.log(`\n🔧 Attempting to fix ${fixable.length} vulnerabilities...\n`);
 
    try {
      execSync('npm audit fix', { stdio: 'inherit' });
      console.log('\n✅ Automatic fixes applied successfully\n');
    } catch (e) {
      console.error('⚠️ Some fixes could not be applied automatically\n');
    }
  }
 
  // Report manual fixes needed
  if (manual.length > 0) {
    console.log('\n📋 Manual remediation required:\n');
 
    manual.forEach(item => {
      const emoji = {
        critical: '🔴',
        high: '🟠',
        moderate: '🟡',
        low: '🟢'
      }[item.severity] || '⚪';
 
      console.log(`${emoji} ${item.package}`);
      console.log(`   Severity: ${item.severity}`);
      if (item.breaking) {
        console.log(`   Fix: Upgrade to ${item.fixVersion} (BREAKING)`);
      } else if (item.reason) {
        console.log(`   Note: ${item.reason}`);
      }
      console.log('');
    });
  }
 
  // Generate report
  const report = {
    timestamp: new Date().toISOString(),
    summary: {
      total: Object.keys(vulnerabilities).length,
      autoFixed: fixable.length,
      manualRequired: manual.length
    },
    manualFixes: manual
  };
 
  fs.writeFileSync(
    'vulnerability-fix-report.json',
    JSON.stringify(report, null, 2)
  );
 
  console.log('📝 Report saved to vulnerability-fix-report.json');
 
  // Exit with error if critical/high vulnerabilities remain
  const criticalRemaining = manual.filter(
    m => m.severity === 'critical' || m.severity === 'high'
  );
 
  if (criticalRemaining.length > 0) {
    console.error(`\n❌ ${criticalRemaining.length} critical/high vulnerabilities require manual fixes`);
    process.exit(1);
  }
}
 
autoFixVulnerabilities().catch(console.error);

Policy Enforcement

SCA Policy Configuration

# sca-policy.yaml
version: "1.0"
 
vulnerabilities:
  # Fail build on these severities
  fail_on:
    - critical
    - high
 
  # Allow with conditions
  exceptions:
    # Exceptions require justification and expiry
    - cve: "CVE-2023-12345"
      package: "example-package"
      reason: "No fix available, mitigated by WAF rules"
      expires: "2025-06-01"
      approved_by: "security-team"
 
  # Auto-fix settings
  auto_fix:
    enabled: true
    max_severity: "moderate"  # Only auto-fix up to moderate
    exclude_breaking: true
 
licenses:
  # Approved licenses
  allowed:
    - MIT
    - Apache-2.0
    - BSD-2-Clause
    - BSD-3-Clause
    - ISC
    - CC0-1.0
    - Unlicense
 
  # Conditional approval (require review)
  conditional:
    - LGPL-2.1
    - LGPL-3.0
    - MPL-2.0
 
  # Blocked licenses
  blocked:
    - GPL-2.0
    - GPL-3.0
    - AGPL-3.0
    - SSPL-1.0
 
  # Action for unknown licenses
  unknown_action: warn  # fail, warn, allow
 
packages:
  # Banned packages (known security issues or abandoned)
  banned:
    - event-stream  # Compromised in 2018
    - left-pad  # Historic incident
    - ua-parser-js  # Supply chain attack
 
  # Deprecated packages requiring migration
  deprecated:
    - request:
        replacement: "axios or node-fetch"
        deadline: "2025-01-01"
    - moment:
        replacement: "date-fns or dayjs"
        deadline: "2025-06-01"
 
  # Require specific versions
  pinned:
    - lodash: ">=4.17.21"  # Security fixes
 
age_policy:
  # Warn on packages not updated in X months
  warn_if_stale_months: 24
  # Fail on packages not updated in X months
  fail_if_stale_months: 36
 
maintainer_policy:
  # Warn if package has fewer than X maintainers
  min_maintainers: 2
  # Warn if main maintainer inactive for X months
  maintainer_active_months: 12

Summary

Effective SCA implementation requires:

Automated scanning in CI/CD pipelines
License compliance enforcement
SBOM generation for supply chain transparency
Vulnerability remediation workflows
Policy-as-code for consistent enforcement

Integrate these practices to secure your open source supply chain while maintaining development velocity. Regular reviews of policies and exceptions ensure continued alignment with security requirements.