Skip to content

Commit dc169d8

Browse files
committed
Add support for generating commit and patch URLs for all supported Git repositories.
Treat GitLab subdomains as GitLab when using the repository URL instead of a generic type. Add support for salsa.debian.org, gitlab.eclipse.org, forge.fedoraproject.org domains Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent 92da2f2 commit dc169d8

4 files changed

Lines changed: 260 additions & 64 deletions

File tree

src/packageurl/contrib/purl2url.py

Lines changed: 149 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ def get_repo_download_url_by_package_type(
5353

5454
repo_router = Router()
5555
download_router = Router()
56+
commit_router = Router()
57+
patch_router = Router()
5658

5759

5860
def _get_url_from_router(router, purl):
@@ -70,6 +72,30 @@ def get_repo_url(purl):
7072
return _get_url_from_router(repo_router, purl)
7173

7274

75+
def get_commit_url(purl):
76+
"""
77+
Return a Commit URL inferred from the `purl` string.
78+
"""
79+
commit_url = _get_url_from_router(commit_router, purl)
80+
if commit_url:
81+
return commit_url
82+
83+
purl_data = PackageURL.from_string(purl)
84+
return purl_data.qualifiers.get("commit_url", None)
85+
86+
87+
def get_patch_url(purl):
88+
"""
89+
Return a Patch URL inferred from the `purl` string.
90+
"""
91+
patch_url = _get_url_from_router(patch_router, purl)
92+
if patch_url:
93+
return patch_url
94+
95+
purl_data = PackageURL.from_string(purl)
96+
return purl_data.qualifiers.get("patch_url", None)
97+
98+
7399
def get_download_url(purl):
74100
"""
75101
Return a download URL inferred from the `purl` string.
@@ -160,18 +186,29 @@ def build_github_repo_url(purl):
160186
return repo_url
161187

162188

189+
SUB_GITLAB_DOMAINS = [r"^git\.codelinaro\.org", r"^salsa\.debian\.org", r"^gitlab\.(?!com\b)[^/]+"]
190+
191+
163192
@repo_router.route("pkg:gitlab/.*")
164193
def build_gitlab_repo_url(purl):
165194
"""
166195
Return a gitlab repo URL from the `purl` string.
167196
"""
168197
purl_data = PackageURL.from_string(purl)
169-
170198
namespace = purl_data.namespace
171199
name = purl_data.name
200+
qualifiers = purl_data.qualifiers
201+
repository_url = qualifiers.get("repository_url")
202+
if not (namespace and name):
203+
return
172204

173-
if name and namespace:
174-
return f"https://gitlab.com/{namespace}/{name}"
205+
if repository_url:
206+
clean_url = re.sub(r"^https?://", "", repository_url)
207+
for pattern in SUB_GITLAB_DOMAINS:
208+
if re.match(pattern, clean_url):
209+
return f"https://{namespace}/{name}"
210+
211+
return f"https://gitlab.com/{namespace}/{name}"
175212

176213

177214
GIT_REPO_GENERIC = {
@@ -182,8 +219,9 @@ def build_gitlab_repo_url(purl):
182219
"cgit\.git\.savannah\.gnu\.org",
183220
"web\.git\.kernel\.org",
184221
): {
185-
"commit_url": "https://{namespace}/{name}.git/commit/?id={version}",
186222
"repo_url": "https://{namespace}/{name}.git",
223+
"commit_url": "https://{namespace}/{name}.git/commit/?id={version}",
224+
"patch_url": "https://{namespace}/{name}.git/patch/?id={version}",
187225
},
188226
# gitiles
189227
(
@@ -192,13 +230,15 @@ def build_gitlab_repo_url(purl):
192230
r"chromium\.googlesource\.com",
193231
r"gerrit\.googlesource\.com",
194232
): {
195-
"commit_url": "https://{namespace}/{name}/+/{version}",
196233
"repo_url": "https://{namespace}/{name}",
234+
"commit_url": "https://{namespace}/{name}/+/{version}",
235+
"patch_url": "https://{namespace}/{name}/+/{version}^!?format=TEXT", # base64 encoded
197236
},
198237
# allura
199238
(r"sourceforge\.net", r"forge-allura\.apache\.org"): {
200-
"commit_url": "https://{namespace}/{name}/ci/{version}",
201239
"repo_url": "https://{namespace}/{name}",
240+
"commit_url": "https://{namespace}/{name}/ci/{version}",
241+
"patch_url": "https://{namespace}/{name}/ci/{version}", # FIXME
202242
},
203243
# gitweb
204244
(
@@ -210,35 +250,29 @@ def build_gitlab_repo_url(purl):
210250
): {
211251
"commit_url": "https://{namespace}/?p={name}.git;a=commit;h={version}",
212252
"repo_url": "https://{namespace}/?p={name}.git",
253+
"patch_url": "https://{namespace}/?p={name}.git;a=patch;h={version}",
213254
},
214255
# gitea / forgejo
215256
(
216257
r"codeberg\.org",
217258
r"gitea\.com",
259+
r"forge\.fedoraproject\.org",
218260
): {
219261
"commit_url": "https://{namespace}/{name}/commit/{version}",
220262
"repo_url": "https://{namespace}/{name}",
221-
},
222-
# sub gitlab ( excludes gitlab.com )
223-
(
224-
r"git\.codelinaro\.org.*",
225-
r"gitlab\.(?!com\b)[^/]+",
226-
): {
227-
"commit_url": "https://{namespace}/{name}/-/commit/{version}",
228-
"repo_url": "https://{namespace}/{name}",
263+
"patch_url": "https://{namespace}/{name}/commit/{version}.patch",
229264
},
230265
}
231266

232267

233268
@repo_router.route("pkg:generic/.*")
234269
def build_generic_repo_url(purl):
235270
"""
236-
Return a Commit URL from the `purl` string.
271+
Return a Repo URL from the `purl` string.
237272
"""
238273
purl_data = PackageURL.from_string(purl)
239274
name = purl_data.name
240275
namespace = purl_data.namespace
241-
version = purl_data.version
242276

243277
if not (namespace and name):
244278
return
@@ -248,14 +282,109 @@ def build_generic_repo_url(purl):
248282
if not re.match(pattern, namespace):
249283
continue
250284

251-
if version:
252-
return template_url["commit_url"].format(
253-
namespace=namespace, name=name, version=version
254-
)
255285
return template_url["repo_url"].format(namespace=namespace, name=name)
256286
return
257287

258288

289+
@commit_router.route("pkg:generic/.*")
290+
def build_generic_commit_url(purl):
291+
"""
292+
Return a Commit URL from the `purl` string.
293+
"""
294+
purl_data = PackageURL.from_string(purl)
295+
name = purl_data.name
296+
namespace = purl_data.namespace
297+
version = purl_data.version
298+
299+
if not (namespace and name and version):
300+
return
301+
302+
for patterns, template_url in GIT_REPO_GENERIC.items():
303+
for pattern in patterns:
304+
if not re.match(pattern, namespace):
305+
continue
306+
307+
return template_url["commit_url"].format(
308+
namespace=namespace, name=name, version=version
309+
)
310+
return
311+
312+
313+
@patch_router.route("pkg:generic/.*")
314+
def build_generic_patch_url(purl):
315+
"""
316+
Return a Patch URL from the `purl` string.
317+
"""
318+
purl_data = PackageURL.from_string(purl)
319+
name = purl_data.name
320+
namespace = purl_data.namespace
321+
version = purl_data.version
322+
323+
if not (namespace and name and version):
324+
return
325+
326+
for patterns, template_url in GIT_REPO_GENERIC.items():
327+
for pattern in patterns:
328+
if not re.match(pattern, namespace):
329+
continue
330+
331+
return template_url["patch_url"].format(namespace=namespace, name=name, version=version)
332+
return
333+
334+
335+
@commit_router.route("pkg:gitlab/.*", "pkg:bitbucket/.*", "pkg:github/.*")
336+
def build_main_commit_url(purl):
337+
"""
338+
Return a github/gitlab/bitbucket Commit URL from the `purl` string.
339+
"""
340+
purl_data = PackageURL.from_string(purl)
341+
purl_type = purl_data.type
342+
name = purl_data.name
343+
namespace = purl_data.namespace
344+
version = purl_data.version
345+
if not (namespace and name and version):
346+
return
347+
348+
commit_url_template = {
349+
"github": f"https://github.com/{namespace}/{name}/commit/{version}",
350+
"gitlab": f"https://gitlab.com/{namespace}/{name}/-/commit/{version}",
351+
"sub-gitlab": f"https://{namespace}/{name}/-/commit/{version}",
352+
"bitbucket": f"https://bitbucket.org/{namespace}/{name}/commits/{version}",
353+
}
354+
355+
if purl_type == "gitlab" and purl_data.qualifiers.get("repository_url"):
356+
purl_type = "sub-gitlab"
357+
358+
return commit_url_template[purl_type].format(namespace=namespace, name=name, version=version)
359+
360+
361+
@patch_router.route("pkg:gitlab/.*", "pkg:bitbucket/.*", "pkg:github/.*")
362+
def build_main_patch_url(purl):
363+
"""
364+
Return a github/gitlab/bitbucket Patch URL from the `purl` string.
365+
"""
366+
purl_data = PackageURL.from_string(purl)
367+
purl_type = purl_data.type
368+
name = purl_data.name
369+
namespace = purl_data.namespace
370+
version = purl_data.version
371+
372+
if not (namespace and name and version):
373+
return
374+
375+
patch_url_templates = {
376+
"github": f"https://github.com/{namespace}/{name}/commit/{version}.patch",
377+
"gitlab": f"https://gitlab.com/{namespace}/{name}/-/commit/{version}.patch",
378+
"sub-gitlab": f"https://{namespace}/{name}/-/commit/{version}.patch",
379+
"bitbucket": f"https://bitbucket.org/{namespace}/{name}/commits/{version}/raw",
380+
}
381+
382+
if purl_type == "gitlab" and purl_data.qualifiers.get("repository_url"):
383+
purl_type = "sub-gitlab"
384+
385+
return patch_url_templates[purl_type].format(namespace=namespace, name=name, version=version)
386+
387+
259388
@repo_router.route("pkg:(gem|rubygems)/.*")
260389
def build_rubygems_repo_url(purl):
261390
"""

src/packageurl/contrib/url2purl.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,7 @@ def build_route_regex(domain_patterns, path_suffix="/.*"):
675675
return rf"https?://({domain_pattern}){path_suffix}"
676676

677677

678-
SUB_GITLAB_DOMAINS = [r"git\.codelinaro\.org", r"gitlab\.(?!com\b)[^/]+"]
678+
SUB_GITLAB_DOMAINS = [r"git\.codelinaro\.org", r"salsa\.debian\.org", r"gitlab\.(?!com\b)[^/]+"]
679679
SUB_GITLAB_ROUTE_REGEX = build_route_regex(SUB_GITLAB_DOMAINS)
680680

681681

@@ -686,29 +686,46 @@ def build_gitlab_sub_purl(url):
686686
For example:
687687
https://gitlab.gnome.org/GNOME/gimp
688688
https://git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm
689+
https://gitlab.eclipse.org/eclipse/asciidoc-lang/asciidoc-lang
689690
https://gitlab.gnome.org/GNOME/gimp/-/commit/112a5e038f0646eae5ae314988ec074433d2b365
690691
https://git.codelinaro.org/linaro/qcom/project/-/commit/a40a9732c840e5a324fba78b0ff7980b497c3831
691692
"""
693+
archive_pattern = (
694+
r"^https?://" r"(?P<namespace>[^/]+)/" r".+?/-/archive/[^/]+/" r"(?P<name>[^/]+)$"
695+
)
692696

693-
gitlab_sub_commit_pattern = (
697+
commit_pattern = (
694698
r"^https?://"
695699
r"(?P<namespace>.+?)/"
696700
r"(?P<name>[^/]+)"
697701
r"(?:/-/commit/(?P<version>[0-9a-fA-F]{7,64}))?"
698702
r"/?$"
699703
)
700704

701-
commit_match = re.search(gitlab_sub_commit_pattern, url)
705+
archive_match = re.search(archive_pattern, url)
706+
if archive_match:
707+
namespace = archive_match.group("namespace")
708+
name = archive_match.group("name")
709+
return PackageURL(
710+
type="gitlab", namespace=namespace, name=name, qualifiers={"download_url": url}
711+
)
712+
713+
commit_match = re.search(commit_pattern, url)
702714
if commit_match:
715+
namespace = commit_match.group("namespace")
716+
name = commit_match.group("name")
717+
version = commit_match.group("version")
718+
703719
return PackageURL(
704-
type="generic",
705-
namespace=commit_match.group("namespace"),
706-
name=commit_match.group("name"),
707-
version=commit_match.group("version"),
720+
type="gitlab",
721+
namespace=namespace,
722+
name=name,
723+
version=version,
724+
qualifiers={"repository_url": f"https://{namespace}/{name}"},
708725
)
709726

710727

711-
GITEA_DOMAINS = ["codeberg\.org", "gitea\.com"]
728+
GITEA_DOMAINS = [r"codeberg\.org", r"gitea\.com", r"forge\.fedoraproject\.org"]
712729
GITEA_ROUTE_REGEX = build_route_regex(GITEA_DOMAINS)
713730

714731

@@ -722,7 +739,6 @@ def build_gitea_purl(url):
722739
https://codeberg.org/alpinelinux/aports/commit/a40a9732c840e5a324fba78b0ff7980b497c3831
723740
https://gitea.com/htc47/entur/commit/271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c
724741
"""
725-
726742
gitea_commit_pattern = (
727743
r"^https?://"
728744
r"(?P<namespace>.+?)/"
@@ -771,7 +787,7 @@ def build_cgit_purl(url):
771787
"""
772788

773789
# https://git.kernel.org/stable/c/<hash>
774-
kernel_shorthand = r"^https?://git\.kernel\.org/stable/c/" r"(?P<version>[0-9a-fA-F]{7,64})/?$"
790+
kernel_shorthand = r"^https?://git\.kernel\.org/stable/c/(?P<version>[0-9a-fA-F]{7,64})/?$"
775791

776792
cgit_project_pattern = (
777793
r"^https?://"

0 commit comments

Comments
 (0)