Modern applications typically comprise 70-90% open source code, making Software Composition Analysis (SCA) essential for security. This guide covers implementing comprehensive SCA in your DevSecOps pipeline.
Understanding SCA Components
Dependency Analysis Architecture
# sca_analyzer.py
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Set
from enum import Enum
import json
class SeverityLevel(Enum):
CRITICAL = "critical"
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
INFO = "info"
class LicenseRisk(Enum):
PERMISSIVE = "permissive" # MIT, Apache, BSD
WEAK_COPYLEFT = "weak_copyleft" # LGPL, MPL
STRONG_COPYLEFT = "strong_copyleft" # GPL, AGPL
COMMERCIAL = "commercial"
UNKNOWN = "unknown"
@dataclass
class Vulnerability:
cve_id: str
severity: SeverityLevel
cvss_score: float
description: str
affected_versions: str
fixed_version: Optional[str]
references: List[str] = field(default_factory=list)
exploitability: str = "unknown"
@dataclass
class License:
name: str
spdx_id: str
risk_level: LicenseRisk
requires_attribution: bool
requires_disclosure: bool
commercial_use_allowed: bool
@dataclass
class Dependency:
name: str
version: str
ecosystem: str # npm, pypi, maven, etc.
direct: bool # Direct or transitive
license: Optional[License]
vulnerabilities: List[Vulnerability] = field(default_factory=list)
dependencies: List[str] = field(default_factory=list)
@dataclass
class SCAReport:
project_name: str
scan_timestamp: str
total_dependencies: int
direct_dependencies: int
transitive_dependencies: int
vulnerabilities_by_severity: Dict[str, int]
license_breakdown: Dict[str, int]
dependencies: List[Dependency]
policy_violations: List[str]
class SCAAnalyzer:
"""Core SCA analysis engine"""
# License classification
LICENSE_MAP = {
"MIT": License("MIT", "MIT", LicenseRisk.PERMISSIVE, True, False, True),
"Apache-2.0": License("Apache 2.0", "Apache-2.0", LicenseRisk.PERMISSIVE, True, False, True),
"BSD-3-Clause": License("BSD 3-Clause", "BSD-3-Clause", LicenseRisk.PERMISSIVE, True, False, True),
"ISC": License("ISC", "ISC", LicenseRisk.PERMISSIVE, True, False, True),
"LGPL-2.1": License("LGPL 2.1", "LGPL-2.1", LicenseRisk.WEAK_COPYLEFT, True, True, True),
"LGPL-3.0": License("LGPL 3.0", "LGPL-3.0", LicenseRisk.WEAK_COPYLEFT, True, True, True),
"MPL-2.0": License("MPL 2.0", "MPL-2.0", LicenseRisk.WEAK_COPYLEFT, True, True, True),
"GPL-2.0": License("GPL 2.0", "GPL-2.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
"GPL-3.0": License("GPL 3.0", "GPL-3.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
"AGPL-3.0": License("AGPL 3.0", "AGPL-3.0", LicenseRisk.STRONG_COPYLEFT, True, True, False),
}
def __init__(self, vulnerability_db, policy_config):
self.vuln_db = vulnerability_db
self.policy = policy_config
def analyze(self, dependencies: List[Dependency]) -> SCAReport:
"""Perform full SCA analysis"""
# Enrich with vulnerability data
for dep in dependencies:
dep.vulnerabilities = self.vuln_db.lookup(
dep.ecosystem, dep.name, dep.version
)
dep.license = self._identify_license(dep)
# Check policy violations
violations = self._check_policy_violations(dependencies)
# Generate report
return self._generate_report(dependencies, violations)
def _identify_license(self, dep: Dependency) -> Optional[License]:
"""Identify and classify license"""
# This would typically call package registry API
license_id = dep.raw_license if hasattr(dep, 'raw_license') else None
return self.LICENSE_MAP.get(license_id)
def _check_policy_violations(self, dependencies: List[Dependency]) -> List[str]:
"""Check dependencies against policy"""
violations = []
for dep in dependencies:
# Check vulnerability severity thresholds
for vuln in dep.vulnerabilities:
if vuln.severity == SeverityLevel.CRITICAL:
if not self.policy.get('allow_critical', False):
violations.append(
f"CRITICAL vulnerability {vuln.cve_id} in {dep.name}@{dep.version}"
)
# Check license compliance
if dep.license:
blocked = self.policy.get('blocked_licenses', [])
if dep.license.spdx_id in blocked:
violations.append(
f"Blocked license {dep.license.spdx_id} in {dep.name}"
)
# Check deprecated/banned packages
banned = self.policy.get('banned_packages', [])
if dep.name in banned:
violations.append(f"Banned package: {dep.name}")
return violations
def _generate_report(
self,
dependencies: List[Dependency],
violations: List[str]
) -> SCAReport:
"""Generate comprehensive SCA report"""
# Count vulnerabilities by severity
vuln_counts = {s.value: 0 for s in SeverityLevel}
for dep in dependencies:
for vuln in dep.vulnerabilities:
vuln_counts[vuln.severity.value] += 1
# Count licenses
license_counts = {}
for dep in dependencies:
if dep.license:
license_id = dep.license.spdx_id
license_counts[license_id] = license_counts.get(license_id, 0) + 1
return SCAReport(
project_name="",
scan_timestamp=datetime.utcnow().isoformat(),
total_dependencies=len(dependencies),
direct_dependencies=sum(1 for d in dependencies if d.direct),
transitive_dependencies=sum(1 for d in dependencies if not d.direct),
vulnerabilities_by_severity=vuln_counts,
license_breakdown=license_counts,
dependencies=dependencies,
policy_violations=violations
)CI/CD Pipeline Integration
GitHub Actions SCA Workflow
# .github/workflows/sca-scan.yml
name: Software Composition Analysis
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
schedule:
- cron: '0 6 * * *' # Daily at 6 AM
jobs:
dependency-scan:
runs-on: ubuntu-latest
permissions:
contents: read
security-events: write
steps:
- uses: actions/checkout@v4
# npm/Node.js scanning
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '20'
- name: Install Dependencies
run: npm ci
- name: npm audit
run: |
npm audit --json > npm-audit.json || true
node scripts/parse-npm-audit.js npm-audit.json
# Snyk scanning
- name: Run Snyk to check for vulnerabilities
uses: snyk/actions/node@master
continue-on-error: true
env:
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
with:
args: --severity-threshold=high --json-file-output=snyk-results.json
# OWASP Dependency Check
- name: OWASP Dependency Check
uses: dependency-check/Dependency-Check_Action@main
with:
project: 'my-project'
path: '.'
format: 'JSON'
out: 'dependency-check-report'
args: >
--suppression suppression.xml
--failOnCVSS 7
# Trivy for container dependencies
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
scan-type: 'fs'
scan-ref: '.'
format: 'sarif'
output: 'trivy-results.sarif'
# Upload to GitHub Security
- name: Upload Trivy results to GitHub Security
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: 'trivy-results.sarif'
# License scanning
- name: License Compliance Check
run: |
npx license-checker --json --out licenses.json
node scripts/check-license-compliance.js licenses.json
# Generate SBOM
- name: Generate SBOM
run: |
npx @cyclonedx/cyclonedx-npm --output-file sbom.json
- name: Upload SBOM
uses: actions/upload-artifact@v4
with:
name: sbom
path: sbom.json
python-scan:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install dependencies
run: |
pip install safety pip-audit
- name: Safety Check
run: |
pip freeze > requirements-lock.txt
safety check -r requirements-lock.txt --json > safety-report.json || true
- name: pip-audit
run: |
pip-audit --format json --output pip-audit.json || true
- name: Upload Python scan results
uses: actions/upload-artifact@v4
with:
name: python-security-reports
path: |
safety-report.json
pip-audit.jsonLicense Compliance Checker
// scripts/check-license-compliance.js
const fs = require('fs');
// Policy configuration
const policy = {
allowed: [
'MIT',
'Apache-2.0',
'BSD-2-Clause',
'BSD-3-Clause',
'ISC',
'CC0-1.0',
'0BSD',
'Unlicense'
],
conditional: [
// Allowed but require review
'LGPL-2.1',
'LGPL-3.0',
'MPL-2.0'
],
blocked: [
'GPL-2.0',
'GPL-3.0',
'AGPL-3.0',
'SSPL-1.0',
'BUSL-1.1'
],
unknownAction: 'warn' // 'fail', 'warn', or 'allow'
};
function checkLicenses(licensesFile) {
const licenses = JSON.parse(fs.readFileSync(licensesFile, 'utf8'));
const results = {
passed: [],
warnings: [],
failures: [],
unknown: []
};
for (const [pkg, info] of Object.entries(licenses)) {
const license = info.licenses;
if (policy.blocked.includes(license)) {
results.failures.push({
package: pkg,
license,
reason: 'License is blocked by policy'
});
} else if (policy.conditional.includes(license)) {
results.warnings.push({
package: pkg,
license,
reason: 'License requires legal review'
});
} else if (policy.allowed.includes(license)) {
results.passed.push({ package: pkg, license });
} else {
results.unknown.push({
package: pkg,
license,
reason: 'Unknown license - requires review'
});
}
}
// Output results
console.log('\n=== License Compliance Report ===\n');
console.log(`✅ Passed: ${results.passed.length} packages`);
console.log(`⚠️ Warnings: ${results.warnings.length} packages`);
console.log(`❌ Failures: ${results.failures.length} packages`);
console.log(`❓ Unknown: ${results.unknown.length} packages`);
if (results.failures.length > 0) {
console.log('\n--- Blocked Licenses ---');
results.failures.forEach(f => {
console.log(` ❌ ${f.package}: ${f.license}`);
});
}
if (results.warnings.length > 0) {
console.log('\n--- Licenses Requiring Review ---');
results.warnings.forEach(w => {
console.log(` ⚠️ ${w.package}: ${w.license}`);
});
}
// Exit code based on failures
const exitCode = results.failures.length > 0 ? 1 : 0;
process.exit(exitCode);
}
// Run
const licensesFile = process.argv[2] || 'licenses.json';
checkLicenses(licensesFile);SBOM Generation and Management
CycloneDX SBOM Generator
# sbom_generator.py
import json
import hashlib
from datetime import datetime
from typing import List, Dict
import uuid
class SBOMGenerator:
"""Generate CycloneDX SBOM"""
def __init__(self, project_name: str, version: str):
self.project_name = project_name
self.version = version
def generate(self, dependencies: List[Dict]) -> Dict:
"""Generate CycloneDX format SBOM"""
sbom = {
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"serialNumber": f"urn:uuid:{uuid.uuid4()}",
"version": 1,
"metadata": {
"timestamp": datetime.utcnow().isoformat() + "Z",
"tools": [{
"vendor": "Custom",
"name": "sbom-generator",
"version": "1.0.0"
}],
"component": {
"type": "application",
"name": self.project_name,
"version": self.version,
"bom-ref": f"pkg:{self.project_name}@{self.version}"
}
},
"components": [],
"dependencies": []
}
# Add components
for dep in dependencies:
component = self._create_component(dep)
sbom["components"].append(component)
# Add dependency relationships
sbom["dependencies"].append({
"ref": component["bom-ref"],
"dependsOn": [
self._create_purl(d) for d in dep.get("dependencies", [])
]
})
return sbom
def _create_component(self, dep: Dict) -> Dict:
"""Create CycloneDX component"""
purl = self._create_purl(dep)
component = {
"type": "library",
"bom-ref": purl,
"name": dep["name"],
"version": dep["version"],
"purl": purl,
"scope": "required" if dep.get("direct", True) else "optional"
}
# Add license if available
if dep.get("license"):
component["licenses"] = [{
"license": {
"id": dep["license"]
}
}]
# Add hashes if available
if dep.get("integrity"):
component["hashes"] = [{
"alg": "SHA-512",
"content": dep["integrity"].replace("sha512-", "")
}]
# Add external references
if dep.get("repository"):
component["externalReferences"] = [{
"type": "vcs",
"url": dep["repository"]
}]
return component
def _create_purl(self, dep: Dict) -> str:
"""Create Package URL (PURL)"""
ecosystem = dep.get("ecosystem", "npm")
name = dep["name"]
version = dep["version"]
# Handle scoped packages
if name.startswith("@"):
namespace, pkg_name = name[1:].split("/")
return f"pkg:{ecosystem}/{namespace}/{pkg_name}@{version}"
return f"pkg:{ecosystem}/{name}@{version}"
def export_json(self, sbom: Dict, filepath: str):
"""Export SBOM to JSON file"""
with open(filepath, 'w') as f:
json.dump(sbom, f, indent=2)
def export_xml(self, sbom: Dict, filepath: str):
"""Export SBOM to XML format"""
import xml.etree.ElementTree as ET
root = ET.Element("bom")
root.set("xmlns", "http://cyclonedx.org/schema/bom/1.5")
root.set("version", str(sbom["version"]))
root.set("serialNumber", sbom["serialNumber"])
# Add metadata
metadata = ET.SubElement(root, "metadata")
timestamp = ET.SubElement(metadata, "timestamp")
timestamp.text = sbom["metadata"]["timestamp"]
# Add components
components = ET.SubElement(root, "components")
for comp in sbom["components"]:
component = ET.SubElement(components, "component")
component.set("type", comp["type"])
component.set("bom-ref", comp["bom-ref"])
name = ET.SubElement(component, "name")
name.text = comp["name"]
version = ET.SubElement(component, "version")
version.text = comp["version"]
purl = ET.SubElement(component, "purl")
purl.text = comp["purl"]
tree = ET.ElementTree(root)
tree.write(filepath, encoding="utf-8", xml_declaration=True)Vulnerability Remediation
Automated Dependency Updates
# .github/dependabot.yml
version: 2
updates:
# npm dependencies
- package-ecosystem: "npm"
directory: "/"
schedule:
interval: "weekly"
day: "monday"
time: "06:00"
open-pull-requests-limit: 10
groups:
# Group minor/patch updates
production-dependencies:
dependency-type: "production"
update-types:
- "minor"
- "patch"
development-dependencies:
dependency-type: "development"
update-types:
- "minor"
- "patch"
# Security updates are always separate
ignore:
- dependency-name: "*"
update-types: ["version-update:semver-major"]
labels:
- "dependencies"
- "automated"
commit-message:
prefix: "deps"
include: "scope"
# Python dependencies
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"
open-pull-requests-limit: 5
# Docker base images
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "weekly"
# GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"Vulnerability Fix Automation
// scripts/auto-fix-vulnerabilities.js
const { execSync } = require('child_process');
const fs = require('fs');
async function autoFixVulnerabilities() {
console.log('🔍 Scanning for vulnerabilities...\n');
// Run npm audit
let auditResult;
try {
auditResult = JSON.parse(
execSync('npm audit --json', { encoding: 'utf8' })
);
} catch (e) {
auditResult = JSON.parse(e.stdout);
}
const vulnerabilities = auditResult.vulnerabilities || {};
const fixable = [];
const manual = [];
for (const [pkg, data] of Object.entries(vulnerabilities)) {
if (data.fixAvailable) {
if (typeof data.fixAvailable === 'boolean') {
fixable.push(pkg);
} else if (!data.fixAvailable.isSemVerMajor) {
// Non-breaking fix available
fixable.push(pkg);
} else {
// Major version bump required
manual.push({
package: pkg,
severity: data.severity,
currentVersion: data.range,
fixVersion: data.fixAvailable.version,
breaking: true
});
}
} else {
manual.push({
package: pkg,
severity: data.severity,
reason: 'No fix available'
});
}
}
// Apply automatic fixes
if (fixable.length > 0) {
console.log(`\n🔧 Attempting to fix ${fixable.length} vulnerabilities...\n`);
try {
execSync('npm audit fix', { stdio: 'inherit' });
console.log('\n✅ Automatic fixes applied successfully\n');
} catch (e) {
console.error('⚠️ Some fixes could not be applied automatically\n');
}
}
// Report manual fixes needed
if (manual.length > 0) {
console.log('\n📋 Manual remediation required:\n');
manual.forEach(item => {
const emoji = {
critical: '🔴',
high: '🟠',
moderate: '🟡',
low: '🟢'
}[item.severity] || '⚪';
console.log(`${emoji} ${item.package}`);
console.log(` Severity: ${item.severity}`);
if (item.breaking) {
console.log(` Fix: Upgrade to ${item.fixVersion} (BREAKING)`);
} else if (item.reason) {
console.log(` Note: ${item.reason}`);
}
console.log('');
});
}
// Generate report
const report = {
timestamp: new Date().toISOString(),
summary: {
total: Object.keys(vulnerabilities).length,
autoFixed: fixable.length,
manualRequired: manual.length
},
manualFixes: manual
};
fs.writeFileSync(
'vulnerability-fix-report.json',
JSON.stringify(report, null, 2)
);
console.log('📝 Report saved to vulnerability-fix-report.json');
// Exit with error if critical/high vulnerabilities remain
const criticalRemaining = manual.filter(
m => m.severity === 'critical' || m.severity === 'high'
);
if (criticalRemaining.length > 0) {
console.error(`\n❌ ${criticalRemaining.length} critical/high vulnerabilities require manual fixes`);
process.exit(1);
}
}
autoFixVulnerabilities().catch(console.error);Policy Enforcement
SCA Policy Configuration
# sca-policy.yaml
version: "1.0"
vulnerabilities:
# Fail build on these severities
fail_on:
- critical
- high
# Allow with conditions
exceptions:
# Exceptions require justification and expiry
- cve: "CVE-2023-12345"
package: "example-package"
reason: "No fix available, mitigated by WAF rules"
expires: "2025-06-01"
approved_by: "security-team"
# Auto-fix settings
auto_fix:
enabled: true
max_severity: "moderate" # Only auto-fix up to moderate
exclude_breaking: true
licenses:
# Approved licenses
allowed:
- MIT
- Apache-2.0
- BSD-2-Clause
- BSD-3-Clause
- ISC
- CC0-1.0
- Unlicense
# Conditional approval (require review)
conditional:
- LGPL-2.1
- LGPL-3.0
- MPL-2.0
# Blocked licenses
blocked:
- GPL-2.0
- GPL-3.0
- AGPL-3.0
- SSPL-1.0
# Action for unknown licenses
unknown_action: warn # fail, warn, allow
packages:
# Banned packages (known security issues or abandoned)
banned:
- event-stream # Compromised in 2018
- left-pad # Historic incident
- ua-parser-js # Supply chain attack
# Deprecated packages requiring migration
deprecated:
- request:
replacement: "axios or node-fetch"
deadline: "2025-01-01"
- moment:
replacement: "date-fns or dayjs"
deadline: "2025-06-01"
# Require specific versions
pinned:
- lodash: ">=4.17.21" # Security fixes
age_policy:
# Warn on packages not updated in X months
warn_if_stale_months: 24
# Fail on packages not updated in X months
fail_if_stale_months: 36
maintainer_policy:
# Warn if package has fewer than X maintainers
min_maintainers: 2
# Warn if main maintainer inactive for X months
maintainer_active_months: 12Summary
Effective SCA implementation requires:
- Automated scanning in CI/CD pipelines
- License compliance enforcement
- SBOM generation for supply chain transparency
- Vulnerability remediation workflows
- Policy-as-code for consistent enforcement
Integrate these practices to secure your open source supply chain while maintaining development velocity. Regular reviews of policies and exceptions ensure continued alignment with security requirements.
Is your AI system compliant with the EU AI Act? Free risk assessment - find out in 2 minutes →