Modern applications typically comprise 70-90% open source code, making Software Composition Analysis (SCA) essential for security. This guide covers implementing comprehensive SCA in your DevSecOps pipeline.
Understanding SCA Components
Dependency Analysis Architecture
# sca_analyzer.py
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Set
from enum import Enum
import json
class SeverityLevel(Enum):
CRITICAL = "critical"
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
INFO = "info"
class LicenseRisk(Enum):
PERMISSIVE = "permissive" # MIT, Apache, BSD
WEAK_COPYLEFT = "weak_copyleft" # LGPL, MPL
STRONG_COPYLEFT = "strong_copyleft" # GPL, AGPL
COMMERCIAL = "commercial"
UNKNOWN = "unknown"
@dataclass
class Vulnerability:
cve_id: str
severity: SeverityLevel
cvss_score: float
description: str
affected_versions: str
fixed_version: Optional[str]
references: List[str] = field(default_factory=list)
exploitability: str = "unknown"
@dataclass
class License:
name: str
spdx_id: str
risk_level: LicenseRisk
requires_attribution: bool
requires_disclosure: bool
commercial_use_allowed: bool
@dataclass
class Dependency:
name: str
version: str
ecosystem: str # npm, pypi, maven, etc.
direct: bool # Direct or transitive
license: Optional[License]
vulnerabilities: List[Vulnerability] = field(default_factory=list)
dependencies: List[str] = field(default_factory=list)
@dataclass
class SCAReport:
project_name: str
scan_timestamp: str
total_dependencies: int
direct_dependencies: int
transitive_dependencies: int
vulnerabilities_by_severity: Dict[str, int]
license_breakdown: Dict[str, int]
dependencies: List[Dependency]
policy_violations: List[str]
class SCAAnalyzer:
"""Core SCA analysis engine"""
# License classification
LICENSE_MAP = {
"MIT": License("MIT", "MIT", LicenseRisk.PERMISSIVE, True, False, True),
"Apache-2.0": License("Apache 2.0", "Apache-2.0", LicenseRisk.PERMISSIVE, True, False, True),
"BSD-3-Clause": License("BSD 3-Clause", "BSD-3-Clause", LicenseRisk.PERMISSIVE, True, False, True),
"ISC": License("ISC", "ISC", LicenseRisk.PERMISSIVE, True, False, True),
"LGPL-2.1": License("LGPL 2.1", "LGPL-2.1", LicenseRisk.WEAK_COPYLEFT, True, True, True),
"LGPL-3.0": License("LGPL 3.0", "LGPL-3.0", LicenseRisk.WEAK_COPYLEFT, True, True, True),
"MPL-2.0": License("MPL 2.0", "MPL-2.0", LicenseRisk.WEAK_COPYLEFT, True, True, True),
"GPL-2.0": License("GPL 2.0", "GPL-2.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
"GPL-3.0": License("GPL 3.0", "GPL-3.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
"AGPL-3.0": License("AGPL 3.0", "AGPL-3.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
}
def __init__(self, vulnerability_db, policy_config):
self.vuln_db = vulnerability_db
self.policy = policy_config
def analyze(self, dependencies: List[Dependency]) -> SCAReport:
"""Perform full SCA analysis"""
# Enrich with vulnerability data
for dep in dependencies:
dep.vulnerabilities = self.vuln_db.lookup(
dep.ecosystem, dep.name, dep.version
)
dep.license = self._identify_license(dep)
# Check policy violations
violations = self._check_policy_violations(dependencies)
# Generate report
return self._generate_report(dependencies, violations)
def _identify_license(self, dep: Dependency) -> Optional[License]:
"""Identify and classify license"""
# This would typically call package registry API
license_id = dep.raw_license if hasattr(dep, 'raw_license') else None
return self.LICENSE_MAP.get(license_id)
def _check_policy_violations(self, dependencies: List[Dependency]) -> List[str]:
"""Check dependencies against policy"""
violations = []
for dep in dependencies:
# Check vulnerability severity thresholds
for vuln in dep.vulnerabilities:
if vuln.severity == SeverityLevel.CRITICAL:
if not self.policy.get('allow_critical', False):
violations.append(
f"CRITICAL vulnerability {vuln.cve_id} in {dep.name}@{dep.version}"
)
# Check license compliance
if dep.license:
blocked = self.policy.get('blocked_licenses', [])
if dep.license.spdx_id in blocked:
violations.append(
f"Blocked license {dep.license.spdx_id} in {dep.name}"
)
# Check deprecated/banned packages
banned = self.policy.get('banned_packages', [])
if dep.name in banned:
violations.append(f"Banned package: {dep.name}")
return violations
def _generate_report(
self,
dependencies: List[Dependency],
violations: List[str]
) -> SCAReport:
"""Generate comprehensive SCA report"""
# Count vulnerabilities by severity
vuln_counts = {s.value: 0 for s in SeverityLevel}
for dep in dependencies:
for vuln in dep.vulnerabilities:
vuln_counts[vuln.severity.value] += 1
# Count licenses
license_counts = {}
for dep in dependencies:
if dep.license:
license_id = dep.license.spdx_id
license_counts[license_id] = license_counts.get(license_id, 0) + 1
return SCAReport(
project_name="",
scan_timestamp=datetime.utcnow().isoformat(),
total_dependencies=len(dependencies),
direct_dependencies=sum(1 for d in dependencies if d.direct),
transitive_dependencies=sum(1 for d in dependencies if not d.direct),
vulnerabilities_by_severity=vuln_counts,
license_breakdown=license_counts,
dependencies=dependencies,
policy_violations=violations
)CI/CD Pipeline Integration
GitHub Actions SCA Workflow
# .github/workflows/sca-scan.yml
name: Software Composition Analysis
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
schedule:
- cron: '0 6 * * *' # Daily at 6 AM
jobs:
dependency-scan:
runs-on: ubuntu-latest
permissions:
contents: read
security-events: write
steps:
- uses: actions/checkout@v4
# npm/Node.js scanning
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install Dependencies
run: npm ci
- name: npm audit
run: |
npm audit --json > npm-audit.json || true
node scripts/parse-npm-audit.js npm-audit.json
# Snyk scanning
- name: Run Snyk to check for vulnerabilities
uses: snyk/actions/node@master
continue-on-error: true
env:
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
with:
args: --severity-threshold=high --json-file-output=snyk-results.json
# OWASP Dependency Check
- name: OWASP Dependency Check
uses: dependency-check/Dependency-Check_Action@main
with:
project: 'my-project'
path: '.'
format: 'JSON'
out: 'dependency-check-report'
args: >
--suppression suppression.xml
--failOnCVSS 7
# Trivy for container dependencies
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
scan-type: 'fs'
scan-ref: '.'
format: 'sarif'
output: 'trivy-results.sarif'
# Upload to GitHub Security
- name: Upload Trivy results to GitHub Security
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: 'trivy-results.sarif'
# License scanning
- name: License Compliance Check
run: |
npx license-checker --json --out licenses.json
node scripts/check-license-compliance.js licenses.json
# Generate SBOM
- name: Generate SBOM
run: |
npx @cyclonedx/cyclonedx-npm --output-file sbom.json
- name: Upload SBOM
uses: actions/upload-artifact@v4
with:
name: sbom
path: sbom.json
python-scan:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install safety pip-audit
- name: Safety Check
run: |
pip freeze > requirements-lock.txt
safety check -r requirements-lock.txt --json > safety-report.json || true
- name: pip-audit
run: |
pip-audit --format json --output pip-audit.json || true
- name: Upload Python scan results
uses: actions/upload-artifact@v4
with:
name: python-security-reports
path: |
safety-report.json
pip-audit.jsonLicense Compliance Checker
// scripts/check-license-compliance.js
const fs = require('fs');
// Policy configuration
const policy = {
allowed: [
'MIT',
'Apache-2.0',
'BSD-2-Clause',
'BSD-3-Clause',
'ISC',
'CC0-1.0',
'0BSD',
'Unlicense'
],
conditional: [
// Allowed but require review
'LGPL-2.1',
'LGPL-3.0',
'MPL-2.0'
],
blocked: [
'GPL-2.0',
'GPL-3.0',
'AGPL-3.0',
'SSPL-1.0',
'BUSL-1.1'
],
unknownAction: 'warn' // 'fail', 'warn', or 'allow'
};
function checkLicenses(licensesFile) {
const licenses = JSON.parse(fs.readFileSync(licensesFile, 'utf8'));
const results = {
passed: [],
warnings: [],
failures: [],
unknown: []
};
for (const [pkg, info] of Object.entries(licenses)) {
const license = info.licenses;
if (policy.blocked.includes(license)) {
results.failures.push({
package: pkg,
license,
reason: 'License is blocked by policy'
});
} else if (policy.conditional.includes(license)) {
results.warnings.push({
package: pkg,
license,
reason: 'License requires legal review'
});
} else if (policy.allowed.includes(license)) {
results.passed.push({ package: pkg, license });
} else {
results.unknown.push({
package: pkg,
license,
reason: 'Unknown license - requires review'
});
}
}
// Output results
console.log('\n=== License Compliance Report ===\n');
console.log(`✅ Passed: ${results.passed.length} packages`);
console.log(`⚠️ Warnings: ${results.warnings.length} packages`);
console.log(`❌ Failures: ${results.failures.length} packages`);
console.log(`❓ Unknown: ${results.unknown.length} packages`);
if (results.failures.length > 0) {
console.log('\n--- Blocked Licenses ---');
results.failures.forEach(f => {
console.log(` ❌ ${f.package}: ${f.license}`);
});
}
if (results.warnings.length > 0) {
console.log('\n--- Licenses Requiring Review ---');
results.warnings.forEach(w => {
console.log(` ⚠️ ${w.package}: ${w.license}`);
});
}
// Exit code based on failures
const exitCode = results.failures.length > 0 ? 1 : 0;
process.exit(exitCode);
}
// Run
const licensesFile = process.argv[2] || 'licenses.json';
checkLicenses(licensesFile);SBOM Generation and Management
CycloneDX SBOM Generator
# sbom_generator.py
import json
import hashlib
from datetime import datetime
from typing import List, Dict
import uuid
class SBOMGenerator:
"""Generate CycloneDX SBOM"""
def __init__(self, project_name: str, version: str):
self.project_name = project_name
self.version = version
def generate(self, dependencies: List[Dict]) -> Dict:
"""Generate CycloneDX format SBOM"""
sbom = {
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"serialNumber": f"urn:uuid:{uuid.uuid4()}",
"version": 1,
"metadata": {
"timestamp": datetime.utcnow().isoformat() + "Z",
"tools": [{
"vendor": "Custom",
"name": "sbom-generator",
"version": "1.0.0"
}],
"component": {
"type": "application",
"name": self.project_name,
"version": self.version,
"bom-ref": f"pkg:{self.project_name}@{self.version}"
}
},
"components": [],
"dependencies": []
}
# Add components
for dep in dependencies:
component = self._create_component(dep)
sbom["components"].append(component)
# Add dependency relationships
sbom["dependencies"].append({
"ref": component["bom-ref"],
"dependsOn": [
self._create_purl(d) for d in dep.get("dependencies", [])
]
})
return sbom
def _create_component(self, dep: Dict) -> Dict:
"""Create CycloneDX component"""
purl = self._create_purl(dep)
component = {
"type": "library",
"bom-ref": purl,
"name": dep["name"],
"version": dep["version"],
"purl": purl,
"scope": "required" if dep.get("direct", True) else "optional"
}
# Add license if available
if dep.get("license"):
component["licenses"] = [{
"license": {
"id": dep["license"]
}
}]
# Add hashes if available
if dep.get("integrity"):
component["hashes"] = [{
"alg": "SHA-512",
"content": dep["integrity"].replace("sha512-", "")
}]
# Add external references
if dep.get("repository"):
component["externalReferences"] = [{
"type": "vcs",
"url": dep["repository"]
}]
return component
def _create_purl(self, dep: Dict) -> str:
"""Create Package URL (PURL)"""
ecosystem = dep.get("ecosystem", "npm")
name = dep["name"]
version = dep["version"]
# Handle scoped packages
if name.startswith("@"):
namespace, pkg_name = name[1:].split("/")
return f"pkg:{ecosystem}/{namespace}/{pkg_name}@{version}"
return f"pkg:{ecosystem}/{name}@{version}"
def export_json(self, sbom: Dict, filepath: str):
"""Export SBOM to JSON file"""
with open(filepath, 'w') as f:
json.dump(sbom, f, indent=2)
def export_xml(self, sbom: Dict, filepath: str):
"""Export SBOM to XML format"""
import xml.etree.ElementTree as ET
root = ET.Element("bom")
root.set("xmlns", "http://cyclonedx.org/schema/bom/1.5")
root.set("version", str(sbom["version"]))
root.set("serialNumber", sbom["serialNumber"])
# Add metadata
metadata = ET.SubElement(root, "metadata")
timestamp = ET.SubElement(metadata, "timestamp")
timestamp.text = sbom["metadata"]["timestamp"]
# Add components
components = ET.SubElement(root, "components")
for comp in sbom["components"]:
component = ET.SubElement(components, "component")
component.set("type", comp["type"])
component.set("bom-ref", comp["bom-ref"])
name = ET.SubElement(component, "name")
name.text = comp["name"]
version = ET.SubElement(component, "version")
version.text = comp["version"]
purl = ET.SubElement(component, "purl")
purl.text = comp["purl"]
tree = ET.ElementTree(root)
tree.write(filepath, encoding="utf-8", xml_declaration=True)Vulnerability Remediation
Automated Dependency Updates
# .github/dependabot.yml
version: 2
updates:
# npm dependencies
- package-ecosystem: "npm"
directory: "/"
schedule:
interval: "weekly"
day: "monday"
time: "06:00"
open-pull-requests-limit: 10
groups:
# Group minor/patch updates
production-dependencies:
dependency-type: "production"
update-types:
- "minor"
- "patch"
development-dependencies:
dependency-type: "development"
update-types:
- "minor"
- "patch"
# Security updates are always separate
ignore:
- dependency-name: "*"
update-types: ["version-update:semver-major"]
labels:
- "dependencies"
- "automated"
commit-message:
prefix: "deps"
include: "scope"
# Python dependencies
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 5
# Docker base images
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "weekly"
# GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"Vulnerability Fix Automation
// scripts/auto-fix-vulnerabilities.js
const { execSync } = require('child_process');
const fs = require('fs');
async function autoFixVulnerabilities() {
console.log('🔍 Scanning for vulnerabilities...\n');
// Run npm audit
let auditResult;
try {
auditResult = JSON.parse(
execSync('npm audit --json', { encoding: 'utf8' })
);
} catch (e) {
auditResult = JSON.parse(e.stdout);
}
const vulnerabilities = auditResult.vulnerabilities || {};
const fixable = [];
const manual = [];
for (const [pkg, data] of Object.entries(vulnerabilities)) {
if (data.fixAvailable) {
if (typeof data.fixAvailable === 'boolean') {
fixable.push(pkg);
} else if (!data.fixAvailable.isSemVerMajor) {
// Non-breaking fix available
fixable.push(pkg);
} else {
// Major version bump required
manual.push({
package: pkg,
severity: data.severity,
currentVersion: data.range,
fixVersion: data.fixAvailable.version,
breaking: true
});
}
} else {
manual.push({
package: pkg,
severity: data.severity,
reason: 'No fix available'
});
}
}
// Apply automatic fixes
if (fixable.length > 0) {
console.log(`\n🔧 Attempting to fix ${fixable.length} vulnerabilities...\n`);
try {
execSync('npm audit fix', { stdio: 'inherit' });
console.log('\n✅ Automatic fixes applied successfully\n');
} catch (e) {
console.error('⚠️ Some fixes could not be applied automatically\n');
}
}
// Report manual fixes needed
if (manual.length > 0) {
console.log('\n📋 Manual remediation required:\n');
manual.forEach(item => {
const emoji = {
critical: '🔴',
high: '🟠',
moderate: '🟡',
low: '🟢'
}[item.severity] || '⚪';
console.log(`${emoji} ${item.package}`);
console.log(` Severity: ${item.severity}`);
if (item.breaking) {
console.log(` Fix: Upgrade to ${item.fixVersion} (BREAKING)`);
} else if (item.reason) {
console.log(` Note: ${item.reason}`);
}
console.log('');
});
}
// Generate report
const report = {
timestamp: new Date().toISOString(),
summary: {
total: Object.keys(vulnerabilities).length,
autoFixed: fixable.length,
manualRequired: manual.length
},
manualFixes: manual
};
fs.writeFileSync(
'vulnerability-fix-report.json',
JSON.stringify(report, null, 2)
);
console.log('📝 Report saved to vulnerability-fix-report.json');
// Exit with error if critical/high vulnerabilities remain
const criticalRemaining = manual.filter(
m => m.severity === 'critical' || m.severity === 'high'
);
if (criticalRemaining.length > 0) {
console.error(`\n❌ ${criticalRemaining.length} critical/high vulnerabilities require manual fixes`);
process.exit(1);
}
}
autoFixVulnerabilities().catch(console.error);Policy Enforcement
SCA Policy Configuration
# sca-policy.yaml
version: "1.0"
vulnerabilities:
# Fail build on these severities
fail_on:
- critical
- high
# Allow with conditions
exceptions:
# Exceptions require justification and expiry
- cve: "CVE-2023-12345"
package: "example-package"
reason: "No fix available, mitigated by WAF rules"
expires: "2025-06-01"
approved_by: "security-team"
# Auto-fix settings
auto_fix:
enabled: true
max_severity: "moderate" # Only auto-fix up to moderate
exclude_breaking: true
licenses:
# Approved licenses
allowed:
- MIT
- Apache-2.0
- BSD-2-Clause
- BSD-3-Clause
- ISC
- CC0-1.0
- Unlicense
# Conditional approval (require review)
conditional:
- LGPL-2.1
- LGPL-3.0
- MPL-2.0
# Blocked licenses
blocked:
- GPL-2.0
- GPL-3.0
- AGPL-3.0
- SSPL-1.0
# Action for unknown licenses
unknown_action: warn # fail, warn, allow
packages:
# Banned packages (known security issues or abandoned)
banned:
- event-stream # Compromised in 2018
- left-pad # Historic incident
- ua-parser-js # Supply chain attack
# Deprecated packages requiring migration
deprecated:
- request:
replacement: "axios or node-fetch"
deadline: "2025-01-01"
- moment:
replacement: "date-fns or dayjs"
deadline: "2025-06-01"
# Require specific versions
pinned:
- lodash: ">=4.17.21" # Security fixes
age_policy:
# Warn on packages not updated in X months
warn_if_stale_months: 24
# Fail on packages not updated in X months
fail_if_stale_months: 36
maintainer_policy:
# Warn if package has fewer than X maintainers
min_maintainers: 2
# Warn if main maintainer inactive for X months
maintainer_active_months: 12Summary
Effective SCA implementation requires:
- Automated scanning in CI/CD pipelines
- License compliance enforcement
- SBOM generation for supply chain transparency
- Vulnerability remediation workflows
- Policy-as-code for consistent enforcement
Integrate these practices to secure your open source supply chain while maintaining development velocity. Regular reviews of policies and exceptions ensure continued alignment with security requirements.