Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Collect data from Anchore NVD overrides #1773

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions SOURCES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
.. _anchore:

Anchore NVD Overrides
---------------------
Anchore provides overrides for NVD data to improve accuracy. This importer fetches data from their
`nvd-data-overrides <https://github.com/anchore/nvd-data-overrides>`_ repository.

+----------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------+
|Importer Name | Data Source |Ecosystems Covered |
+================+======================================================================================================+====================================================+
Expand Down
4 changes: 3 additions & 1 deletion vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from vulnerabilities.importers import apache_httpd
from vulnerabilities.importers import apache_kafka
from vulnerabilities.importers import apache_tomcat
Expand Down Expand Up @@ -42,6 +41,8 @@
from vulnerabilities.pipelines import nvd_importer
from vulnerabilities.pipelines import pypa_importer
from vulnerabilities.pipelines import pysec_importer
from vulnerabilities.pipelines.anchore_importer import AnchoreImporterPipeline
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please follow the import style, take example from above. Thanks!



IMPORTERS_REGISTRY = [
openssl.OpensslImporter,
Expand Down Expand Up @@ -78,6 +79,7 @@
nvd_importer.NVDImporterPipeline,
pysec_importer.PyPIImporterPipeline,
alpine_linux_importer.AlpineLinuxImporterPipeline,
AnchoreImporterPipeline,
]

IMPORTERS_REGISTRY = {
Expand Down
77 changes: 77 additions & 0 deletions vulnerabilities/pipelines/anchore_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from datetime import datetime
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add the license header.

from typing import Iterable
import requests
import yaml
from packageurl import PackageURL
from univers.versions import SemverVersion

from vulnerabilities.importer import AdvisoryData, AffectedPackage, Reference
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline


class AnchoreImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect advisories from Anchore's NVD overrides."""

pipeline_id = "anchore_importer"
root_url = "https://github.com/anchore/nvd-data-overrides"
license_url = "https://github.com/anchore/nvd-data-overrides/blob/main/LICENSE"
spdx_license_expression = "CC0-1.0" # License of Anchore's data
importer_name = "Anchore NVD Overrides Importer"

@classmethod
def steps(cls):
return (
cls.collect_and_store_advisories,
cls.import_new_advisories,
)

def advisories_count(self) -> int:
raw_data = self.fetch_data()
return len(raw_data)

def collect_advisories(self) -> Iterable[AdvisoryData]:
raw_data = self.fetch_data()
for entry in raw_data:
yield self.parse_advisory_data(entry)

def fetch_data(self):
"""Fetch Anchore's NVD overrides from their GitHub repository."""
url = "https://raw.githubusercontent.com/anchore/nvd-data-overrides/main/overrides.yaml"
response = requests.get(url)
response.raise_for_status()
return yaml.safe_load(response.text) # Correct YAML parsing

def parse_advisory_data(self, raw_data) -> AdvisoryData:
"""Parse a single advisory entry into an AdvisoryData object."""
# Ensure required fields are present
if not all(key in raw_data for key in ["cve_id", "package_name", "affected_versions"]):
return None

purl = PackageURL(type="generic", name=raw_data["package_name"])
affected_version_range = raw_data["affected_versions"] # Use raw version range string
fixed_version = (
SemverVersion(raw_data["fixed_version"]) if raw_data.get("fixed_version") else None
)

affected_package = AffectedPackage(
package=purl,
affected_version_range=affected_version_range,
fixed_version=fixed_version,
)

references = [
Reference(url=url) for url in raw_data.get("references", []) if url
]
date_published = (
datetime.strptime(raw_data["published_date"], "%Y-%m-%d")
if raw_data.get("published_date")
else None
)

return AdvisoryData(
aliases=[raw_data["cve_id"]],
summary=raw_data.get("description", ""),
affected_packages=[affected_package],
references=references,
date_published=date_published,
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Run make valid.