Software Composition Analysis identifies vulnerabilities in open-source dependencies before they become security incidents. This guide covers implementing comprehensive SCA in your development pipeline.
SCA Framework Architecture
Build a comprehensive dependency analysis system:
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Set
from enum import Enum
from datetime import datetime
import json
import re
from abc import ABC, abstractmethod
class PackageManager(Enum):
NPM = "npm"
PYPI = "pypi"
MAVEN = "maven"
NUGET = "nuget"
GO = "go"
CARGO = "cargo"
class LicenseRisk(Enum):
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
UNKNOWN = "unknown"
class VulnerabilitySeverity(Enum):
CRITICAL = 4
HIGH = 3
MEDIUM = 2
LOW = 1
INFO = 0
@dataclass
class Vulnerability:
cve_id: str
severity: VulnerabilitySeverity
cvss_score: float
description: str
fixed_version: Optional[str]
published_date: datetime
references: List[str] = field(default_factory=list)
@dataclass
class License:
spdx_id: str
name: str
risk_level: LicenseRisk
copyleft: bool
commercial_use: bool
@dataclass
class Dependency:
name: str
version: str
package_manager: PackageManager
is_direct: bool
license: Optional[License]
vulnerabilities: List[Vulnerability] = field(default_factory=list)
transitive_deps: List[str] = field(default_factory=list)
latest_version: Optional[str] = None
deprecated: bool = False
@dataclass
class SCAReport:
scan_id: str
project_name: str
scanned_at: datetime
dependencies: List[Dependency]
total_vulnerabilities: int
critical_count: int
high_count: int
license_violations: List[Dict]
sbom: Dict
class DependencyParser(ABC):
@abstractmethod
def parse(self, content: str) -> List[Dependency]:
pass
class NpmParser(DependencyParser):
def parse(self, content: str) -> List[Dependency]:
"""Parse package-lock.json or npm ls output."""
dependencies = []
try:
data = json.loads(content)
# Handle package-lock.json format
if 'packages' in data:
for path, pkg_info in data['packages'].items():
if not path:
continue
name = path.split('node_modules/')[-1]
version = pkg_info.get('version', 'unknown')
is_direct = path.count('node_modules/') == 1
dependencies.append(Dependency(
name=name,
version=version,
package_manager=PackageManager.NPM,
is_direct=is_direct,
license=self._parse_license(pkg_info.get('license')),
transitive_deps=list(pkg_info.get('dependencies', {}).keys())
))
# Handle package.json format
elif 'dependencies' in data:
for name, version in data.get('dependencies', {}).items():
dependencies.append(Dependency(
name=name,
version=version.lstrip('^~'),
package_manager=PackageManager.NPM,
is_direct=True,
license=None
))
for name, version in data.get('devDependencies', {}).items():
dependencies.append(Dependency(
name=name,
version=version.lstrip('^~'),
package_manager=PackageManager.NPM,
is_direct=True,
license=None
))
except json.JSONDecodeError:
pass
return dependencies
def _parse_license(self, license_str: Optional[str]) -> Optional[License]:
if not license_str:
return None
license_map = {
'MIT': License('MIT', 'MIT License', LicenseRisk.LOW, False, True),
'Apache-2.0': License('Apache-2.0', 'Apache License 2.0', LicenseRisk.LOW, False, True),
'GPL-3.0': License('GPL-3.0', 'GNU GPL v3', LicenseRisk.HIGH, True, True),
'BSD-3-Clause': License('BSD-3-Clause', 'BSD 3-Clause', LicenseRisk.LOW, False, True),
'ISC': License('ISC', 'ISC License', LicenseRisk.LOW, False, True)
}
return license_map.get(license_str, License(license_str, license_str, LicenseRisk.UNKNOWN, False, True))
class PythonParser(DependencyParser):
def parse(self, content: str) -> List[Dependency]:
"""Parse requirements.txt or Pipfile.lock."""
dependencies = []
# Try parsing as Pipfile.lock (JSON)
try:
data = json.loads(content)
for section in ['default', 'develop']:
for name, info in data.get(section, {}).items():
version = info.get('version', '').lstrip('==')
dependencies.append(Dependency(
name=name,
version=version,
package_manager=PackageManager.PYPI,
is_direct=True,
license=None
))
return dependencies
except json.JSONDecodeError:
pass
# Parse as requirements.txt
for line in content.strip().split('\n'):
line = line.strip()
if not line or line.startswith('#') or line.startswith('-'):
continue
# Handle various requirement formats
match = re.match(r'^([a-zA-Z0-9_-]+)([=<>!~]+)?(.+)?$', line)
if match:
name = match.group(1)
version = match.group(3) or 'latest'
dependencies.append(Dependency(
name=name,
version=version,
package_manager=PackageManager.PYPI,
is_direct=True,
license=None
))
return dependencies
class SoftwareCompositionAnalyzer:
def __init__(self, vulnerability_db_path: str = None):
self.parsers = {
PackageManager.NPM: NpmParser(),
PackageManager.PYPI: PythonParser()
}
self.vulnerability_db = self._load_vulnerability_db(vulnerability_db_path)
self.license_policy = self._load_license_policy()
def _load_vulnerability_db(self, path: str) -> Dict:
"""Load vulnerability database (simplified - would use OSV/NVD in production)."""
return {
'lodash': [
Vulnerability(
cve_id='CVE-2021-23337',
severity=VulnerabilitySeverity.HIGH,
cvss_score=7.2,
description='Command Injection in lodash',
fixed_version='4.17.21',
published_date=datetime(2021, 2, 15),
references=['https://nvd.nist.gov/vuln/detail/CVE-2021-23337']
)
],
'requests': [
Vulnerability(
cve_id='CVE-2023-32681',
severity=VulnerabilitySeverity.MEDIUM,
cvss_score=5.9,
description='Unintended leak of Proxy-Authorization header',
fixed_version='2.31.0',
published_date=datetime(2023, 5, 22),
references=['https://nvd.nist.gov/vuln/detail/CVE-2023-32681']
)
]
}
def _load_license_policy(self) -> Dict:
"""Load organization license policy."""
return {
'allowed': ['MIT', 'Apache-2.0', 'BSD-2-Clause', 'BSD-3-Clause', 'ISC', 'Unlicense'],
'restricted': ['GPL-2.0', 'GPL-3.0', 'AGPL-3.0', 'LGPL-2.1', 'LGPL-3.0'],
'banned': ['SSPL-1.0', 'Commons Clause']
}
def analyze(self, manifest_content: str, package_manager: PackageManager, project_name: str) -> SCAReport:
"""Perform full SCA analysis."""
parser = self.parsers.get(package_manager)
if not parser:
raise ValueError(f"Unsupported package manager: {package_manager}")
dependencies = parser.parse(manifest_content)
# Enrich with vulnerability data
for dep in dependencies:
dep.vulnerabilities = self._check_vulnerabilities(dep)
# Check license compliance
license_violations = self._check_license_compliance(dependencies)
# Generate SBOM
sbom = self._generate_sbom(dependencies, project_name)
# Count vulnerabilities
critical = sum(1 for d in dependencies for v in d.vulnerabilities if v.severity == VulnerabilitySeverity.CRITICAL)
high = sum(1 for d in dependencies for v in d.vulnerabilities if v.severity == VulnerabilitySeverity.HIGH)
total = sum(len(d.vulnerabilities) for d in dependencies)
return SCAReport(
scan_id=f"sca_{datetime.utcnow().strftime('%Y%m%d%H%M%S')}",
project_name=project_name,
scanned_at=datetime.utcnow(),
dependencies=dependencies,
total_vulnerabilities=total,
critical_count=critical,
high_count=high,
license_violations=license_violations,
sbom=sbom
)
def _check_vulnerabilities(self, dep: Dependency) -> List[Vulnerability]:
"""Check dependency against vulnerability database."""
vulns = []
db_vulns = self.vulnerability_db.get(dep.name.lower(), [])
for vuln in db_vulns:
if vuln.fixed_version:
if self._version_lt(dep.version, vuln.fixed_version):
vulns.append(vuln)
else:
vulns.append(vuln)
return vulns
def _version_lt(self, v1: str, v2: str) -> bool:
"""Compare versions (simplified)."""
def normalize(v):
return [int(x) for x in re.sub(r'[^\d.]', '', v).split('.') if x]
try:
return normalize(v1) < normalize(v2)
except (ValueError, IndexError):
return True
def _check_license_compliance(self, dependencies: List[Dependency]) -> List[Dict]:
"""Check dependencies against license policy."""
violations = []
for dep in dependencies:
if not dep.license:
violations.append({
'dependency': dep.name,
'version': dep.version,
'issue': 'Unknown license',
'severity': 'medium'
})
continue
license_id = dep.license.spdx_id
if license_id in self.license_policy['banned']:
violations.append({
'dependency': dep.name,
'version': dep.version,
'license': license_id,
'issue': 'Banned license',
'severity': 'critical'
})
elif license_id in self.license_policy['restricted']:
violations.append({
'dependency': dep.name,
'version': dep.version,
'license': license_id,
'issue': 'Restricted license - requires legal review',
'severity': 'high'
})
return violations
def _generate_sbom(self, dependencies: List[Dependency], project_name: str) -> Dict:
"""Generate CycloneDX SBOM."""
return {
"bomFormat": "CycloneDX",
"specVersion": "1.4",
"version": 1,
"metadata": {
"timestamp": datetime.utcnow().isoformat(),
"component": {
"type": "application",
"name": project_name
}
},
"components": [
{
"type": "library",
"name": dep.name,
"version": dep.version,
"purl": f"pkg:{dep.package_manager.value}/{dep.name}@{dep.version}",
"licenses": [{"license": {"id": dep.license.spdx_id}}] if dep.license else []
}
for dep in dependencies
]
}CI/CD Integration
Integrate SCA into pipelines:
# .github/workflows/sca-scan.yml
name: Software Composition Analysis
on:
push:
branches: [main, develop]
pull_request:
branches: [main]
schedule:
- cron: '0 6 * * *'
jobs:
dependency-scan:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run Snyk Security Scan
uses: snyk/actions/node@master
env:
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
with:
args: --severity-threshold=high --json-file-output=snyk-results.json
- name: Run OWASP Dependency Check
uses: dependency-check/Dependency-Check_Action@main
with:
project: '${{ github.repository }}'
path: '.'
format: 'JSON'
out: 'dependency-check-report'
- name: Run Trivy Filesystem Scan
uses: aquasecurity/trivy-action@master
with:
scan-type: 'fs'
scan-ref: '.'
format: 'json'
output: 'trivy-results.json'
severity: 'CRITICAL,HIGH'
- name: Generate SBOM
run: |
npm install -g @cyclonedx/cdxgen
cdxgen -o sbom.json
- name: Upload SBOM to Dependency Track
run: |
curl -X POST "${{ secrets.DEPENDENCY_TRACK_URL }}/api/v1/bom" \
-H "X-Api-Key: ${{ secrets.DEPENDENCY_TRACK_API_KEY }}" \
-H "Content-Type: multipart/form-data" \
-F "project=${{ github.repository }}" \
-F "bom=@sbom.json"
- name: Check Vulnerability Thresholds
run: |
python scripts/check_sca_thresholds.py \
--snyk snyk-results.json \
--trivy trivy-results.json \
--max-critical 0 \
--max-high 5
- name: Upload Results
uses: actions/upload-artifact@v4
with:
name: sca-results
path: |
snyk-results.json
trivy-results.json
sbom.jsonAutomated Remediation
Automate dependency updates:
import subprocess
from typing import List, Dict, Optional
from dataclasses import dataclass
@dataclass
class UpdateRecommendation:
package: str
current_version: str
recommended_version: str
vulnerabilities_fixed: List[str]
breaking_change_risk: str
auto_update_safe: bool
class DependencyRemediator:
def __init__(self, package_manager: PackageManager):
self.package_manager = package_manager
def generate_recommendations(self, report: SCAReport) -> List[UpdateRecommendation]:
"""Generate update recommendations for vulnerable dependencies."""
recommendations = []
for dep in report.dependencies:
if not dep.vulnerabilities:
continue
# Find the minimum version that fixes all vulnerabilities
fixed_versions = [v.fixed_version for v in dep.vulnerabilities if v.fixed_version]
if not fixed_versions:
continue
recommended = max(fixed_versions, key=lambda v: self._parse_version(v))
# Assess breaking change risk
risk = self._assess_breaking_change_risk(dep.version, recommended)
recommendations.append(UpdateRecommendation(
package=dep.name,
current_version=dep.version,
recommended_version=recommended,
vulnerabilities_fixed=[v.cve_id for v in dep.vulnerabilities],
breaking_change_risk=risk,
auto_update_safe=risk == 'low'
))
return recommendations
def _parse_version(self, version: str) -> tuple:
"""Parse version string into comparable tuple."""
import re
parts = re.findall(r'\d+', version)
return tuple(int(p) for p in parts)
def _assess_breaking_change_risk(self, current: str, target: str) -> str:
"""Assess risk of breaking changes between versions."""
current_parts = self._parse_version(current)
target_parts = self._parse_version(target)
if not current_parts or not target_parts:
return 'unknown'
# Major version change
if target_parts[0] > current_parts[0]:
return 'high'
# Minor version change
if len(target_parts) > 1 and len(current_parts) > 1:
if target_parts[1] > current_parts[1]:
return 'medium'
return 'low'
def create_update_pr(self, recommendations: List[UpdateRecommendation]) -> Dict:
"""Create PR with dependency updates."""
safe_updates = [r for r in recommendations if r.auto_update_safe]
if not safe_updates:
return {'status': 'no_safe_updates', 'updates': []}
# Generate update commands based on package manager
updates = []
for rec in safe_updates:
if self.package_manager == PackageManager.NPM:
cmd = f"npm install {rec.package}@{rec.recommended_version}"
elif self.package_manager == PackageManager.PYPI:
cmd = f"pip install {rec.package}=={rec.recommended_version}"
else:
continue
updates.append({
'package': rec.package,
'command': cmd,
'from': rec.current_version,
'to': rec.recommended_version,
'fixes': rec.vulnerabilities_fixed
})
return {
'status': 'updates_available',
'updates': updates,
'pr_title': f"Security: Update {len(updates)} vulnerable dependencies",
'pr_body': self._generate_pr_body(updates)
}
def _generate_pr_body(self, updates: List[Dict]) -> str:
"""Generate PR description."""
body = "## Security Dependency Updates\n\n"
body += "This PR updates dependencies to fix known vulnerabilities.\n\n"
body += "### Updates\n\n"
body += "| Package | From | To | CVEs Fixed |\n"
body += "|---------|------|----|-----------|\n"
for update in updates:
cves = ', '.join(update['fixes'][:3])
if len(update['fixes']) > 3:
cves += f" +{len(update['fixes']) - 3} more"
body += f"| {update['package']} | {update['from']} | {update['to']} | {cves} |\n"
body += "\n### Testing\n\n"
body += "- [ ] All tests pass\n"
body += "- [ ] No breaking changes detected\n"
body += "- [ ] Security scan passes\n"
return bodyConclusion
Software Composition Analysis is essential for securing the software supply chain. Implement comprehensive dependency scanning, vulnerability detection, and license compliance checking. Automate SBOM generation and integrate SCA into CI/CD pipelines for continuous security monitoring. Regular updates and automated remediation keep dependencies secure.