77# See https://aboutcode.org for more information about nexB OSS projects.
88#
99
10+ import re
11+
1012from aboutcode .pipeline import LoopProgress
1113from packageurl .contrib .url2purl import url2purl
1214
15+ from vulnerabilities .models import AffectedByPackageRelatedVulnerability
1316from vulnerabilities .models import CodeFix
17+ from vulnerabilities .models import FixingPackageRelatedVulnerability
1418from vulnerabilities .models import Package
1519from vulnerabilities .models import VulnerabilityReference
1620from vulnerabilities .pipelines import VulnerableCodePipeline
1721
1822
19- def is_reference_already_processed ( reference_url , commit_id ):
23+ def is_vcs_url_already_processed ( commit_id ):
2024 """
21- Check if a reference and commit ID pair already exists in a CodeFix entry.
25+ Check if a VCS URL exists in a CodeFix entry.
2226 """
23- return CodeFix .objects .filter (
24- references__contains = [reference_url ], commits__contains = [commit_id ]
25- ).exists ()
27+ return CodeFix .objects .filter (commits__contains = [commit_id ]).exists ()
2628
2729
2830class CollectFixCommitsPipeline (VulnerableCodePipeline ):
@@ -38,83 +40,54 @@ def steps(cls):
3840 return (cls .collect_and_store_fix_commits ,)
3941
4042 def collect_and_store_fix_commits (self ):
41- references = VulnerabilityReference .objects .prefetch_related ("vulnerabilities" ).distinct ()
43+ affected_by_package_related_vulnerabilities = (
44+ AffectedByPackageRelatedVulnerability .objects .all ().prefetch_related (
45+ "vulnerability" , "vulnerability__references"
46+ )
47+ )
4248
43- self .log (f"Processing { references .count ():,d} references to collect fix commits." )
49+ self .log (
50+ f"Processing { affected_by_package_related_vulnerabilities .count ():,d} references to collect fix commits."
51+ )
4452
4553 created_fix_count = 0
46- progress = LoopProgress (total_iterations = references .count (), logger = self .log )
47-
48- Reference
49- AffectedByPackageRelatedVulnerability
50- # FixingPackageRelatedVulnerability
54+ progress = LoopProgress (
55+ total_iterations = affected_by_package_related_vulnerabilities .count (), logger = self .log
56+ )
5157
58+ for apv in progress .iter (
59+ affected_by_package_related_vulnerabilities .paginated (per_page = 500 )
60+ ):
61+ vulnerability = apv .vulnerability
62+ for reference in vulnerability .references :
5263
53- for apv in AffectedByPackageRelatedVulnerability .objects .all ():
54- vuln = apv .vulnerability
55- for ref in vuln .references :
64+ if not is_vcs_url (reference .url ):
65+ continue
5666
57- for reference in progress .iter (references .paginated (per_page = 500 )):
58- for vulnerability in reference .vulnerabilities .all ():
5967 vcs_url = normalize_vcs_url (repo_url = reference .url )
6068
6169 if not vcs_url :
6270 continue
6371
6472 # Skip if already processed
65- if is_reference_already_processed ( reference_url = reference . url , commit_id = vcs_url ):
73+ if is_vcs_url_already_processed ( commit_id = vcs_url ):
6674 self .log (
6775 f"Skipping already processed reference: { reference .url } with VCS URL { vcs_url } "
6876 )
6977 continue
70- purl = url2purl (vcs_url )
71- if not purl :
72- self .log (f"Could not create purl from url: { vcs_url } " )
73- continue
74- package = self .get_or_create_package (purl )
75- codefix = self .create_codefix_entry (
76- vulnerability = vulnerability ,
77- package = package ,
78- vcs_url = vcs_url ,
79- reference = reference .url ,
78+ code_fix , created = CodeFix .objects .get_or_create (
79+ commits = [vcs_url ],
80+ affected_package_vulnerability = apv ,
8081 )
81- if codefix :
82+
83+ if created :
8284 created_fix_count += 1
85+ self .log (
86+ f"Created CodeFix entry for reference: { reference .url } with VCS URL { vcs_url } "
87+ )
8388
8489 self .log (f"Successfully created { created_fix_count :,d} CodeFix entries." )
8590
86- def get_or_create_package (self , purl ):
87- """
88- Get or create a Package object from a Package URL.
89- """
90- try :
91- package , _ = Package .objects .get_or_create_from_purl (purl )
92- return package
93- except Exception as e :
94- self .log (f"Error creating package from purl { purl } : { e } " )
95- return None
96-
97- def create_codefix_entry (self , vulnerability , package , vcs_url , reference ):
98- """
99- Create a CodeFix entry associated with the given vulnerability and package.
100- """
101- try :
102- codefix , created = CodeFix .objects .get_or_create (
103- base_version = package ,
104- defaults = {
105- "commits" : [vcs_url ],
106- "references" : [reference ],
107- },
108- )
109- if created :
110- AffectedByPackageRelatedVulnerability .objects .get
111- codefix .package_vulnerabilities .add (vulnerability )
112- codefix .save ()
113- return codefix
114- except Exception as e :
115- self .log (f"Error creating CodeFix entry: { e } " )
116- return
117-
11891
11992PLAIN_URLS = (
12093 "https://" ,
@@ -211,3 +184,72 @@ def normalize_vcs_url(repo_url, vcs_tool=None):
211184 # implicit github, but that's only on NPM?
212185 return f"https://github.com/{ repo_url } "
213186 return repo_url
187+
188+
189+ def is_vcs_url (repo_url ):
190+ """
191+ Check if a given URL or string matches a valid VCS (Version Control System) URL.
192+
193+ Supports:
194+ - Standard VCS URL protocols (git, http, https, ssh)
195+ - Shortcut syntax (e.g., github:user/repo, gitlab:group/repo)
196+ - GitHub shortcut (e.g., user/repo)
197+
198+ Args:
199+ repo_url (str): The repository URL or shortcut to validate.
200+
201+ Returns:
202+ bool: True if the string is a valid VCS URL, False otherwise.
203+
204+ Examples:
205+ >>> is_vcs_url("git://github.com/angular/di.js.git")
206+ True
207+ >>> is_vcs_url("github:user/repo")
208+ True
209+ >>> is_vcs_url("user/repo")
210+ True
211+ >>> is_vcs_url("https://github.com/user/repo.git")
212+ True
213+ >>> is_vcs_url("git@github.com:user/repo.git")
214+ True
215+ >>> is_vcs_url("http://github.com/isaacs/nopt")
216+ True
217+ >>> is_vcs_url("https://gitlab.com/foo/private.git")
218+ True
219+ >>> is_vcs_url("git@gitlab.com:foo/private.git")
220+ True
221+ >>> is_vcs_url("bitbucket:example/repo")
222+ True
223+ >>> is_vcs_url("gist:11081aaa281")
224+ True
225+ >>> is_vcs_url("ftp://example.com/not-a-repo")
226+ False
227+ >>> is_vcs_url("random-string")
228+ False
229+ >>> is_vcs_url("https://example.com/not-a-repo")
230+ False
231+ """
232+ if not repo_url or not isinstance (repo_url , str ):
233+ return False
234+
235+ repo_url = repo_url .strip ()
236+ if not repo_url :
237+ return False
238+
239+ # 1. Match URLs with standard protocols
240+ if re .match (r"^(git|ssh|http|https)://" , repo_url ):
241+ return True
242+
243+ # 2. Match SSH URLs (e.g., git@github.com:user/repo.git)
244+ if re .match (r"^git@\w+\.\w+:[\w\-./]+$" , repo_url ):
245+ return True
246+
247+ # 3. Match shortcut syntax (e.g., github:user/repo)
248+ if re .match (r"^(github|gitlab|bitbucket|gist):[\w\-./]+$" , repo_url ):
249+ return True
250+
251+ # 4. Match implicit GitHub shortcut (e.g., user/repo)
252+ if re .match (r"^[\w\-]+/[\w\-]+$" , repo_url ):
253+ return True
254+
255+ return False
0 commit comments