Skip to content

Commit ec59d7a

Browse files
committed
Add base importer pipeline
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent 1e3afdc commit ec59d7a

6 files changed

Lines changed: 118 additions & 4 deletions

File tree

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
from vulnerabilities.importers import oss_fuzz
3131
from vulnerabilities.importers import postgresql
3232
from vulnerabilities.importers import project_kb_msr2019
33-
from vulnerabilities.importers import pypa
3433
from vulnerabilities.importers import pysec
3534
from vulnerabilities.importers import redhat
3635
from vulnerabilities.importers import retiredotnet
@@ -40,13 +39,13 @@
4039
from vulnerabilities.importers import ubuntu_usn
4140
from vulnerabilities.importers import vulnrichment
4241
from vulnerabilities.importers import xen
42+
from vulnerabilities.pipelines import pypa_importer
4343

4444
IMPORTERS_REGISTRY = [
4545
nvd.NVDImporter,
4646
github.GitHubAPIImporter,
4747
gitlab.GitLabAPIImporter,
4848
npm.NpmImporter,
49-
pypa.PyPaImporter,
5049
nginx.NginxImporter,
5150
pysec.PyPIImporter,
5251
alpine_linux.AlpineImporter,
@@ -75,6 +74,7 @@
7574
github_osv.GithubOSVImporter,
7675
epss.EPSSImporter,
7776
vulnrichment.VulnrichImporter,
77+
pypa_importer.PyPaImporterPipeline,
7878
]
7979

8080
IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}

vulnerabilities/management/commands/import.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from vulnerabilities.import_runner import ImportRunner
1515
from vulnerabilities.importers import IMPORTERS_REGISTRY
16+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
1617

1718

1819
class Command(BaseCommand):
@@ -57,6 +58,13 @@ def import_data(self, importers):
5758

5859
for importer in importers:
5960
self.stdout.write(f"Importing data using {importer.qualified_name}")
61+
if issubclass(importer, VulnerableCodeBaseImporterPipeline):
62+
status, error = importer().execute()
63+
if status != 0:
64+
self.stdout.write(error)
65+
failed_importers.append(importer.qualified_name)
66+
continue
67+
6068
try:
6169
ImportRunner(importer).run()
6270
self.stdout.write(

vulnerabilities/pipelines/__init__.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,16 @@
99
import logging
1010
from datetime import datetime
1111
from datetime import timezone
12+
from traceback import format_exc as traceback_format_exc
13+
from typing import Iterable
1214

1315
from aboutcode.pipeline import BasePipeline
16+
from aboutcode.pipeline import LoopProgress
1417

18+
from vulnerabilities import import_runner
19+
from vulnerabilities.importer import AdvisoryData
20+
from vulnerabilities.improvers.default import DefaultImporter
21+
from vulnerabilities.models import Advisory
1522
from vulnerabilities.utils import classproperty
1623

1724
module_logger = logging.getLogger(__name__)
@@ -32,3 +39,104 @@ def qualified_name(cls):
3239
Fully qualified name prefixed with the module name of the pipeline used in logging.
3340
"""
3441
return f"{cls.__module__}.{cls.__qualname__}"
42+
43+
44+
class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline):
45+
"""
46+
Base importer pipeline for importing advisories.
47+
48+
Uses:
49+
Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories`` method.
50+
Also override the ``steps`` if needed.
51+
"""
52+
53+
license_url = None
54+
spdx_license_expression = None
55+
repo_url = None
56+
importer_name = None
57+
58+
@classmethod
59+
def steps(cls):
60+
return (
61+
# Add step for downloading/cloning resource as required.
62+
cls.collect_and_store_advisories,
63+
cls.import_new_advisories,
64+
# Add step for removing downloaded/cloned resource as required.
65+
)
66+
67+
def collect_advisories(self) -> Iterable[AdvisoryData]:
68+
"""
69+
Yield AdvisoryData for importer pipeline.
70+
71+
Populate the `self.collected_advisories_count` field and yield AdvisoryData
72+
"""
73+
raise NotImplementedError
74+
75+
def advisories_count(self) -> int:
76+
"""
77+
Return the estimated AdvisoryData to be yielded by ``collect_advisories``.
78+
79+
Used by ``collect_and_store_advisories`` to log the progress of advisory collection.
80+
"""
81+
raise NotImplementedError
82+
83+
def collect_and_store_advisories(self):
84+
self.new_advisories = []
85+
86+
collected_advisory_count = 0
87+
progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log)
88+
for advisory in progress.iter(self.collect_advisories()):
89+
self.insert_advisory(advisory=advisory)
90+
collected_advisory_count += 1
91+
92+
self.log(f"Successfully collected {collected_advisory_count:,d} advisories")
93+
94+
def insert_advisory(self, advisory: AdvisoryData):
95+
try:
96+
obj, created = Advisory.objects.get_or_create(
97+
aliases=advisory.aliases,
98+
summary=advisory.summary,
99+
affected_packages=[pkg.to_dict() for pkg in advisory.affected_packages],
100+
references=[ref.to_dict() for ref in advisory.references],
101+
date_published=advisory.date_published,
102+
weaknesses=advisory.weaknesses,
103+
defaults={
104+
"created_by": self.qualified_name,
105+
"date_collected": datetime.now(timezone.utc),
106+
},
107+
url=advisory.url,
108+
)
109+
if created:
110+
self.new_advisories.append(obj)
111+
except Exception as e:
112+
self.log(
113+
f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}",
114+
level=logging.ERROR,
115+
)
116+
117+
def import_new_advisories(self):
118+
new_advisories_count = len(self.new_advisories)
119+
120+
imported_advisory_count = 0
121+
progress = LoopProgress(total_iterations=new_advisories_count, logger=self.log)
122+
for advisory in progress.iter(self.new_advisories):
123+
self.import_advisory(advisory=advisory)
124+
imported_advisory_count += 1
125+
126+
self.log(f"Successfully imported {imported_advisory_count:,d} new advisories")
127+
128+
def import_advisory(self, advisory) -> None:
129+
if advisory.date_imported:
130+
return
131+
try:
132+
advisory_importer = DefaultImporter(advisories=[advisory])
133+
inferences = advisory_importer.get_inferences(advisory_data=advisory.to_advisory_data())
134+
import_runner.process_inferences(
135+
inferences=inferences,
136+
advisory=advisory,
137+
improver_name=self.qualified_name,
138+
)
139+
except Exception as e:
140+
self.log(
141+
f"Failed to process advisory: {advisory!r} with error {e!r}", level=logging.ERROR
142+
)
File renamed without changes.

vulnerabilities/tests/test_pypa.py renamed to vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py

File renamed without changes.

vulnerabilities/tests/test_data_source.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
from vulnerabilities.importers.istio import IstioImporter
2727
from vulnerabilities.importers.mozilla import MozillaImporter
2828
from vulnerabilities.importers.npm import NpmImporter
29-
from vulnerabilities.importers.pypa import PyPaImporter
3029
from vulnerabilities.importers.retiredotnet import RetireDotnetImporter
3130
from vulnerabilities.importers.ruby import RubyImporter
3231
from vulnerabilities.oval_parser import OvalParser
@@ -124,7 +123,6 @@ def test_git_importer(mock_clone):
124123
MozillaImporter,
125124
NpmImporter,
126125
RetireDotnetImporter,
127-
PyPaImporter,
128126
RubyImporter,
129127
GithubOSVImporter,
130128
],

0 commit comments

Comments
 (0)