DevSecOps

Software Composition Analysis (SCA): Securing Your Open Source Dependencies

DeviDevs Team
11 min read
#SCA#DevSecOps#open source security#dependency management#supply chain security

Modern applications typically comprise 70-90% open source code, making Software Composition Analysis (SCA) essential for security. This guide covers implementing comprehensive SCA in your DevSecOps pipeline.

Understanding SCA Components

Dependency Analysis Architecture

# sca_analyzer.py
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Set
from enum import Enum
import json
 
class SeverityLevel(Enum):
    CRITICAL = "critical"
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
    INFO = "info"
 
class LicenseRisk(Enum):
    PERMISSIVE = "permissive"  # MIT, Apache, BSD
    WEAK_COPYLEFT = "weak_copyleft"  # LGPL, MPL
    STRONG_COPYLEFT = "strong_copyleft"  # GPL, AGPL
    COMMERCIAL = "commercial"
    UNKNOWN = "unknown"
 
@dataclass
class Vulnerability:
    cve_id: str
    severity: SeverityLevel
    cvss_score: float
    description: str
    affected_versions: str
    fixed_version: Optional[str]
    references: List[str] = field(default_factory=list)
    exploitability: str = "unknown"
 
@dataclass
class License:
    name: str
    spdx_id: str
    risk_level: LicenseRisk
    requires_attribution: bool
    requires_disclosure: bool
    commercial_use_allowed: bool
 
@dataclass
class Dependency:
    name: str
    version: str
    ecosystem: str  # npm, pypi, maven, etc.
    direct: bool  # Direct or transitive
    license: Optional[License]
    vulnerabilities: List[Vulnerability] = field(default_factory=list)
    dependencies: List[str] = field(default_factory=list)
 
@dataclass
class SCAReport:
    project_name: str
    scan_timestamp: str
    total_dependencies: int
    direct_dependencies: int
    transitive_dependencies: int
    vulnerabilities_by_severity: Dict[str, int]
    license_breakdown: Dict[str, int]
    dependencies: List[Dependency]
    policy_violations: List[str]
 
class SCAAnalyzer:
    """Core SCA analysis engine"""
 
    # License classification
    LICENSE_MAP = {
        "MIT": License("MIT", "MIT", LicenseRisk.PERMISSIVE, True, False, True),
        "Apache-2.0": License("Apache 2.0", "Apache-2.0", LicenseRisk.PERMISSIVE, True, False, True),
        "BSD-3-Clause": License("BSD 3-Clause", "BSD-3-Clause", LicenseRisk.PERMISSIVE, True, False, True),
        "ISC": License("ISC", "ISC", LicenseRisk.PERMISSIVE, True, False, True),
        "LGPL-2.1": License("LGPL 2.1", "LGPL-2.1", LicenseRisk.WEAK_COPYLEFT, True, True, True),
        "LGPL-3.0": License("LGPL 3.0", "LGPL-3.0", LicenseRisk.WEAK_COPYLEFT, True, True, True),
        "MPL-2.0": License("MPL 2.0", "MPL-2.0", LicenseRisk.WEAK_COPYLEFT, True, True, True),
        "GPL-2.0": License("GPL 2.0", "GPL-2.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
        "GPL-3.0": License("GPL 3.0", "GPL-3.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
        "AGPL-3.0": License("AGPL 3.0", "AGPL-3.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
    }
 
    def __init__(self, vulnerability_db, policy_config):
        self.vuln_db = vulnerability_db
        self.policy = policy_config
 
    def analyze(self, dependencies: List[Dependency]) -> SCAReport:
        """Perform full SCA analysis"""
        # Enrich with vulnerability data
        for dep in dependencies:
            dep.vulnerabilities = self.vuln_db.lookup(
                dep.ecosystem, dep.name, dep.version
            )
            dep.license = self._identify_license(dep)
 
        # Check policy violations
        violations = self._check_policy_violations(dependencies)
 
        # Generate report
        return self._generate_report(dependencies, violations)
 
    def _identify_license(self, dep: Dependency) -> Optional[License]:
        """Identify and classify license"""
        # This would typically call package registry API
        license_id = dep.raw_license if hasattr(dep, 'raw_license') else None
        return self.LICENSE_MAP.get(license_id)
 
    def _check_policy_violations(self, dependencies: List[Dependency]) -> List[str]:
        """Check dependencies against policy"""
        violations = []
 
        for dep in dependencies:
            # Check vulnerability severity thresholds
            for vuln in dep.vulnerabilities:
                if vuln.severity == SeverityLevel.CRITICAL:
                    if not self.policy.get('allow_critical', False):
                        violations.append(
                            f"CRITICAL vulnerability {vuln.cve_id} in {dep.name}@{dep.version}"
                        )
 
            # Check license compliance
            if dep.license:
                blocked = self.policy.get('blocked_licenses', [])
                if dep.license.spdx_id in blocked:
                    violations.append(
                        f"Blocked license {dep.license.spdx_id} in {dep.name}"
                    )
 
            # Check deprecated/banned packages
            banned = self.policy.get('banned_packages', [])
            if dep.name in banned:
                violations.append(f"Banned package: {dep.name}")
 
        return violations
 
    def _generate_report(
        self,
        dependencies: List[Dependency],
        violations: List[str]
    ) -> SCAReport:
        """Generate comprehensive SCA report"""
        # Count vulnerabilities by severity
        vuln_counts = {s.value: 0 for s in SeverityLevel}
        for dep in dependencies:
            for vuln in dep.vulnerabilities:
                vuln_counts[vuln.severity.value] += 1
 
        # Count licenses
        license_counts = {}
        for dep in dependencies:
            if dep.license:
                license_id = dep.license.spdx_id
                license_counts[license_id] = license_counts.get(license_id, 0) + 1
 
        return SCAReport(
            project_name="",
            scan_timestamp=datetime.utcnow().isoformat(),
            total_dependencies=len(dependencies),
            direct_dependencies=sum(1 for d in dependencies if d.direct),
            transitive_dependencies=sum(1 for d in dependencies if not d.direct),
            vulnerabilities_by_severity=vuln_counts,
            license_breakdown=license_counts,
            dependencies=dependencies,
            policy_violations=violations
        )

CI/CD Pipeline Integration

GitHub Actions SCA Workflow

# .github/workflows/sca-scan.yml
name: Software Composition Analysis
 
on:
  push:
    branches: [main, develop]
  pull_request:
    branches: [main]
  schedule:
    - cron: '0 6 * * *'  # Daily at 6 AM
 
jobs:
  dependency-scan:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      security-events: write
 
    steps:
      - uses: actions/checkout@v4
 
      # npm/Node.js scanning
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
          node-version: '20'
 
      - name: Install Dependencies
        run: npm ci
 
      - name: npm audit
        run: |
          npm audit --json > npm-audit.json || true
          node scripts/parse-npm-audit.js npm-audit.json
 
      # Snyk scanning
      - name: Run Snyk to check for vulnerabilities
        uses: snyk/actions/node@master
        continue-on-error: true
        env:
          SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
        with:
          args: --severity-threshold=high --json-file-output=snyk-results.json
 
      # OWASP Dependency Check
      - name: OWASP Dependency Check
        uses: dependency-check/Dependency-Check_Action@main
        with:
          project: 'my-project'
          path: '.'
          format: 'JSON'
          out: 'dependency-check-report'
          args: >
            --suppression suppression.xml
            --failOnCVSS 7
 
      # Trivy for container dependencies
      - name: Run Trivy vulnerability scanner
        uses: aquasecurity/trivy-action@master
        with:
          scan-type: 'fs'
          scan-ref: '.'
          format: 'sarif'
          output: 'trivy-results.sarif'
 
      # Upload to GitHub Security
      - name: Upload Trivy results to GitHub Security
        uses: github/codeql-action/upload-sarif@v2
        with:
          sarif_file: 'trivy-results.sarif'
 
      # License scanning
      - name: License Compliance Check
        run: |
          npx license-checker --json --out licenses.json
          node scripts/check-license-compliance.js licenses.json
 
      # Generate SBOM
      - name: Generate SBOM
        run: |
          npx @cyclonedx/cyclonedx-npm --output-file sbom.json
 
      - name: Upload SBOM
        uses: actions/upload-artifact@v4
        with:
          name: sbom
          path: sbom.json
 
  python-scan:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
 
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.11'
 
      - name: Install dependencies
        run: |
          pip install safety pip-audit
 
      - name: Safety Check
        run: |
          pip freeze > requirements-lock.txt
          safety check -r requirements-lock.txt --json > safety-report.json || true
 
      - name: pip-audit
        run: |
          pip-audit --format json --output pip-audit.json || true
 
      - name: Upload Python scan results
        uses: actions/upload-artifact@v4
        with:
          name: python-security-reports
          path: |
            safety-report.json
            pip-audit.json

License Compliance Checker

// scripts/check-license-compliance.js
const fs = require('fs');
 
// Policy configuration
const policy = {
  allowed: [
    'MIT',
    'Apache-2.0',
    'BSD-2-Clause',
    'BSD-3-Clause',
    'ISC',
    'CC0-1.0',
    '0BSD',
    'Unlicense'
  ],
  conditional: [
    // Allowed but require review
    'LGPL-2.1',
    'LGPL-3.0',
    'MPL-2.0'
  ],
  blocked: [
    'GPL-2.0',
    'GPL-3.0',
    'AGPL-3.0',
    'SSPL-1.0',
    'BUSL-1.1'
  ],
  unknownAction: 'warn'  // 'fail', 'warn', or 'allow'
};
 
function checkLicenses(licensesFile) {
  const licenses = JSON.parse(fs.readFileSync(licensesFile, 'utf8'));
 
  const results = {
    passed: [],
    warnings: [],
    failures: [],
    unknown: []
  };
 
  for (const [pkg, info] of Object.entries(licenses)) {
    const license = info.licenses;
 
    if (policy.blocked.includes(license)) {
      results.failures.push({
        package: pkg,
        license,
        reason: 'License is blocked by policy'
      });
    } else if (policy.conditional.includes(license)) {
      results.warnings.push({
        package: pkg,
        license,
        reason: 'License requires legal review'
      });
    } else if (policy.allowed.includes(license)) {
      results.passed.push({ package: pkg, license });
    } else {
      results.unknown.push({
        package: pkg,
        license,
        reason: 'Unknown license - requires review'
      });
    }
  }
 
  // Output results
  console.log('\n=== License Compliance Report ===\n');
 
  console.log(`✅ Passed: ${results.passed.length} packages`);
  console.log(`⚠️  Warnings: ${results.warnings.length} packages`);
  console.log(`❌ Failures: ${results.failures.length} packages`);
  console.log(`❓ Unknown: ${results.unknown.length} packages`);
 
  if (results.failures.length > 0) {
    console.log('\n--- Blocked Licenses ---');
    results.failures.forEach(f => {
      console.log(`  ❌ ${f.package}: ${f.license}`);
    });
  }
 
  if (results.warnings.length > 0) {
    console.log('\n--- Licenses Requiring Review ---');
    results.warnings.forEach(w => {
      console.log(`  ⚠️  ${w.package}: ${w.license}`);
    });
  }
 
  // Exit code based on failures
  const exitCode = results.failures.length > 0 ? 1 : 0;
  process.exit(exitCode);
}
 
// Run
const licensesFile = process.argv[2] || 'licenses.json';
checkLicenses(licensesFile);

SBOM Generation and Management

CycloneDX SBOM Generator

# sbom_generator.py
import json
import hashlib
from datetime import datetime
from typing import List, Dict
import uuid
 
class SBOMGenerator:
    """Generate CycloneDX SBOM"""
 
    def __init__(self, project_name: str, version: str):
        self.project_name = project_name
        self.version = version
 
    def generate(self, dependencies: List[Dict]) -> Dict:
        """Generate CycloneDX format SBOM"""
        sbom = {
            "bomFormat": "CycloneDX",
            "specVersion": "1.5",
            "serialNumber": f"urn:uuid:{uuid.uuid4()}",
            "version": 1,
            "metadata": {
                "timestamp": datetime.utcnow().isoformat() + "Z",
                "tools": [{
                    "vendor": "Custom",
                    "name": "sbom-generator",
                    "version": "1.0.0"
                }],
                "component": {
                    "type": "application",
                    "name": self.project_name,
                    "version": self.version,
                    "bom-ref": f"pkg:{self.project_name}@{self.version}"
                }
            },
            "components": [],
            "dependencies": []
        }
 
        # Add components
        for dep in dependencies:
            component = self._create_component(dep)
            sbom["components"].append(component)
 
            # Add dependency relationships
            sbom["dependencies"].append({
                "ref": component["bom-ref"],
                "dependsOn": [
                    self._create_purl(d) for d in dep.get("dependencies", [])
                ]
            })
 
        return sbom
 
    def _create_component(self, dep: Dict) -> Dict:
        """Create CycloneDX component"""
        purl = self._create_purl(dep)
 
        component = {
            "type": "library",
            "bom-ref": purl,
            "name": dep["name"],
            "version": dep["version"],
            "purl": purl,
            "scope": "required" if dep.get("direct", True) else "optional"
        }
 
        # Add license if available
        if dep.get("license"):
            component["licenses"] = [{
                "license": {
                    "id": dep["license"]
                }
            }]
 
        # Add hashes if available
        if dep.get("integrity"):
            component["hashes"] = [{
                "alg": "SHA-512",
                "content": dep["integrity"].replace("sha512-", "")
            }]
 
        # Add external references
        if dep.get("repository"):
            component["externalReferences"] = [{
                "type": "vcs",
                "url": dep["repository"]
            }]
 
        return component
 
    def _create_purl(self, dep: Dict) -> str:
        """Create Package URL (PURL)"""
        ecosystem = dep.get("ecosystem", "npm")
        name = dep["name"]
        version = dep["version"]
 
        # Handle scoped packages
        if name.startswith("@"):
            namespace, pkg_name = name[1:].split("/")
            return f"pkg:{ecosystem}/{namespace}/{pkg_name}@{version}"
 
        return f"pkg:{ecosystem}/{name}@{version}"
 
    def export_json(self, sbom: Dict, filepath: str):
        """Export SBOM to JSON file"""
        with open(filepath, 'w') as f:
            json.dump(sbom, f, indent=2)
 
    def export_xml(self, sbom: Dict, filepath: str):
        """Export SBOM to XML format"""
        import xml.etree.ElementTree as ET
 
        root = ET.Element("bom")
        root.set("xmlns", "http://cyclonedx.org/schema/bom/1.5")
        root.set("version", str(sbom["version"]))
        root.set("serialNumber", sbom["serialNumber"])
 
        # Add metadata
        metadata = ET.SubElement(root, "metadata")
        timestamp = ET.SubElement(metadata, "timestamp")
        timestamp.text = sbom["metadata"]["timestamp"]
 
        # Add components
        components = ET.SubElement(root, "components")
        for comp in sbom["components"]:
            component = ET.SubElement(components, "component")
            component.set("type", comp["type"])
            component.set("bom-ref", comp["bom-ref"])
 
            name = ET.SubElement(component, "name")
            name.text = comp["name"]
 
            version = ET.SubElement(component, "version")
            version.text = comp["version"]
 
            purl = ET.SubElement(component, "purl")
            purl.text = comp["purl"]
 
        tree = ET.ElementTree(root)
        tree.write(filepath, encoding="utf-8", xml_declaration=True)

Vulnerability Remediation

Automated Dependency Updates

# .github/dependabot.yml
version: 2
updates:
  # npm dependencies
  - package-ecosystem: "npm"
    directory: "/"
    schedule:
      interval: "weekly"
      day: "monday"
      time: "06:00"
    open-pull-requests-limit: 10
    groups:
      # Group minor/patch updates
      production-dependencies:
        dependency-type: "production"
        update-types:
          - "minor"
          - "patch"
      development-dependencies:
        dependency-type: "development"
        update-types:
          - "minor"
          - "patch"
    # Security updates are always separate
    ignore:
      - dependency-name: "*"
        update-types: ["version-update:semver-major"]
    labels:
      - "dependencies"
      - "automated"
    commit-message:
      prefix: "deps"
      include: "scope"
 
  # Python dependencies
  - package-ecosystem: "pip"
    directory: "/"
    schedule:
      interval: "weekly"
    open-pull-requests-limit: 5
 
  # Docker base images
  - package-ecosystem: "docker"
    directory: "/"
    schedule:
      interval: "weekly"
 
  # GitHub Actions
  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "weekly"

Vulnerability Fix Automation

// scripts/auto-fix-vulnerabilities.js
const { execSync } = require('child_process');
const fs = require('fs');
 
async function autoFixVulnerabilities() {
  console.log('🔍 Scanning for vulnerabilities...\n');
 
  // Run npm audit
  let auditResult;
  try {
    auditResult = JSON.parse(
      execSync('npm audit --json', { encoding: 'utf8' })
    );
  } catch (e) {
    auditResult = JSON.parse(e.stdout);
  }
 
  const vulnerabilities = auditResult.vulnerabilities || {};
  const fixable = [];
  const manual = [];
 
  for (const [pkg, data] of Object.entries(vulnerabilities)) {
    if (data.fixAvailable) {
      if (typeof data.fixAvailable === 'boolean') {
        fixable.push(pkg);
      } else if (!data.fixAvailable.isSemVerMajor) {
        // Non-breaking fix available
        fixable.push(pkg);
      } else {
        // Major version bump required
        manual.push({
          package: pkg,
          severity: data.severity,
          currentVersion: data.range,
          fixVersion: data.fixAvailable.version,
          breaking: true
        });
      }
    } else {
      manual.push({
        package: pkg,
        severity: data.severity,
        reason: 'No fix available'
      });
    }
  }
 
  // Apply automatic fixes
  if (fixable.length > 0) {
    console.log(`\n🔧 Attempting to fix ${fixable.length} vulnerabilities...\n`);
 
    try {
      execSync('npm audit fix', { stdio: 'inherit' });
      console.log('\n✅ Automatic fixes applied successfully\n');
    } catch (e) {
      console.error('⚠️ Some fixes could not be applied automatically\n');
    }
  }
 
  // Report manual fixes needed
  if (manual.length > 0) {
    console.log('\n📋 Manual remediation required:\n');
 
    manual.forEach(item => {
      const emoji = {
        critical: '🔴',
        high: '🟠',
        moderate: '🟡',
        low: '🟢'
      }[item.severity] || '⚪';
 
      console.log(`${emoji} ${item.package}`);
      console.log(`   Severity: ${item.severity}`);
      if (item.breaking) {
        console.log(`   Fix: Upgrade to ${item.fixVersion} (BREAKING)`);
      } else if (item.reason) {
        console.log(`   Note: ${item.reason}`);
      }
      console.log('');
    });
  }
 
  // Generate report
  const report = {
    timestamp: new Date().toISOString(),
    summary: {
      total: Object.keys(vulnerabilities).length,
      autoFixed: fixable.length,
      manualRequired: manual.length
    },
    manualFixes: manual
  };
 
  fs.writeFileSync(
    'vulnerability-fix-report.json',
    JSON.stringify(report, null, 2)
  );
 
  console.log('📝 Report saved to vulnerability-fix-report.json');
 
  // Exit with error if critical/high vulnerabilities remain
  const criticalRemaining = manual.filter(
    m => m.severity === 'critical' || m.severity === 'high'
  );
 
  if (criticalRemaining.length > 0) {
    console.error(`\n❌ ${criticalRemaining.length} critical/high vulnerabilities require manual fixes`);
    process.exit(1);
  }
}
 
autoFixVulnerabilities().catch(console.error);

Policy Enforcement

SCA Policy Configuration

# sca-policy.yaml
version: "1.0"
 
vulnerabilities:
  # Fail build on these severities
  fail_on:
    - critical
    - high
 
  # Allow with conditions
  exceptions:
    # Exceptions require justification and expiry
    - cve: "CVE-2023-12345"
      package: "example-package"
      reason: "No fix available, mitigated by WAF rules"
      expires: "2025-06-01"
      approved_by: "security-team"
 
  # Auto-fix settings
  auto_fix:
    enabled: true
    max_severity: "moderate"  # Only auto-fix up to moderate
    exclude_breaking: true
 
licenses:
  # Approved licenses
  allowed:
    - MIT
    - Apache-2.0
    - BSD-2-Clause
    - BSD-3-Clause
    - ISC
    - CC0-1.0
    - Unlicense
 
  # Conditional approval (require review)
  conditional:
    - LGPL-2.1
    - LGPL-3.0
    - MPL-2.0
 
  # Blocked licenses
  blocked:
    - GPL-2.0
    - GPL-3.0
    - AGPL-3.0
    - SSPL-1.0
 
  # Action for unknown licenses
  unknown_action: warn  # fail, warn, allow
 
packages:
  # Banned packages (known security issues or abandoned)
  banned:
    - event-stream  # Compromised in 2018
    - left-pad  # Historic incident
    - ua-parser-js  # Supply chain attack
 
  # Deprecated packages requiring migration
  deprecated:
    - request:
        replacement: "axios or node-fetch"
        deadline: "2025-01-01"
    - moment:
        replacement: "date-fns or dayjs"
        deadline: "2025-06-01"
 
  # Require specific versions
  pinned:
    - lodash: ">=4.17.21"  # Security fixes
 
age_policy:
  # Warn on packages not updated in X months
  warn_if_stale_months: 24
  # Fail on packages not updated in X months
  fail_if_stale_months: 36
 
maintainer_policy:
  # Warn if package has fewer than X maintainers
  min_maintainers: 2
  # Warn if main maintainer inactive for X months
  maintainer_active_months: 12

Summary

Effective SCA implementation requires:

  1. Automated scanning in CI/CD pipelines
  2. License compliance enforcement
  3. SBOM generation for supply chain transparency
  4. Vulnerability remediation workflows
  5. Policy-as-code for consistent enforcement

Integrate these practices to secure your open source supply chain while maintaining development velocity. Regular reviews of policies and exceptions ensure continued alignment with security requirements.

Weekly AI Security & Automation Digest

Get the latest on AI Security, workflow automation, secure integrations, and custom platform development delivered weekly.

No spam. Unsubscribe anytime.