Skip to content

Commit d02949b

Browse files
committed
Update url2purl/purl2url and simplify the regex pattern
Add support for gitbox.apache.org Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent c47acb5 commit d02949b

3 files changed

Lines changed: 31 additions & 42 deletions

File tree

src/packageurl/contrib/purl2url.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ def build_gitlab_repo_url(purl):
179179
(
180180
r"git\.kernel\.org",
181181
r"gitweb\.gentoo\.org",
182+
"cgit\.git\.savannah\.gnu\.org",
183+
"web\.git\.kernel\.org",
182184
): {
183185
"commit_url": "https://{namespace}/{name}.git/commit/?id={version}",
184186
"repo_url": "https://{namespace}/{name}.git",
@@ -194,15 +196,20 @@ def build_gitlab_repo_url(purl):
194196
"repo_url": "https://{namespace}/{name}",
195197
},
196198
# allura
197-
(r"sourceforge\.net", r"forge-allura\.apache\.org"): {
199+
(
200+
r"sourceforge\.net",
201+
r"forge-allura\.apache\.org"
202+
): {
198203
"commit_url": "https://{namespace}/{name}/ci/{version}",
199204
"repo_url": "https://{namespace}/{name}",
200205
},
201206
# gitweb
202207
(
203-
r"gcc\.gnu\.org",
208+
r"gcc\.gnu\.org/git",
204209
r"git\.postgresql\.org",
205-
r"sourceware\.org/git",
210+
"sourceware\.org",
211+
"git\.openssl\.org",
212+
"gitbox\.apache\.org",
206213
): {
207214
"commit_url": "https://{namespace}/?p={name}.git;a=commit;h={version}",
208215
"repo_url": "https://{namespace}/?p={name}.git",
@@ -225,7 +232,6 @@ def build_gitlab_repo_url(purl):
225232
},
226233
}
227234

228-
229235
@repo_router.route("pkg:generic/.*")
230236
def build_generic_repo_url(purl):
231237
"""

src/packageurl/contrib/url2purl.py

Lines changed: 17 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -692,22 +692,17 @@ def build_gitlab_sub_purl(url):
692692

693693
gitlab_sub_commit_pattern = (
694694
r"^https?://"
695-
r"(?P<domain>[^/]+)/"
696695
r"(?P<namespace>.+?)/"
697696
r"(?P<name>[^/]+)"
698-
r"(?:/(?:-/)?commit/(?P<version>[0-9a-fA-F]{7,64}))?"
697+
r"(?:/-/commit/(?P<version>[0-9a-fA-F]{7,64}))?"
699698
r"/?$"
700699
)
701700

702701
commit_match = re.search(gitlab_sub_commit_pattern, url)
703702
if commit_match:
704-
domain = commit_match.group("domain")
705-
raw_namespace = commit_match.group("namespace").strip("/")
706-
namespace = f"{domain}/{raw_namespace}"
707-
708703
return PackageURL(
709704
type="generic",
710-
namespace=namespace,
705+
namespace=commit_match.group("namespace"),
711706
name=commit_match.group("name"),
712707
version=commit_match.group("version"),
713708
)
@@ -730,27 +725,21 @@ def build_gitea_purl(url):
730725

731726
gitea_commit_pattern = (
732727
r"^https?://"
733-
r"(?P<domain>[^/]+)/"
734-
r"(?P<namespace>[^/]+)/"
728+
r"(?P<namespace>.+?)/"
735729
r"(?P<name>[^/]+)"
736730
r"(?:/commit/(?P<version>[0-9a-fA-F]{7,64}))?"
737731
r"/?$"
738732
)
739733

740734
commit_match = re.search(gitea_commit_pattern, url)
741735
if commit_match:
742-
domain = commit_match.group("domain")
743-
namespace = f"{domain}/{commit_match.group('namespace')}"
744-
745736
return PackageURL(
746737
type="generic",
747-
namespace=namespace,
738+
namespace=commit_match.group('namespace'),
748739
name=commit_match.group("name"),
749740
version=commit_match.group("version"),
750741
)
751742

752-
return
753-
754743

755744
CGIT_DOMAINS = [
756745
r"git\.kernel\.org",
@@ -781,15 +770,14 @@ def build_cgit_purl(url):
781770
https://git.kernel.org/stable/c/9a9a8fe26751334b7739193a94eba741073b8a55
782771
"""
783772

773+
# https://git.kernel.org/stable/c/<hash>
784774
kernel_shorthand = (
785-
r"^https?://(?P<domain>git\.kernel\.org)/"
786-
r"(?P<name>[^/]+)/c/"
775+
r"^https?://git\.kernel\.org/stable/c/"
787776
r"(?P<version>[0-9a-fA-F]{7,64})/?$"
788777
)
789778

790779
cgit_project_pattern = (
791780
r"^https?://"
792-
r"(?P<domain>[^/]+)/"
793781
r"(?P<namespace>.+?)/"
794782
r"(?P<name>[^/]+?)"
795783
r"(?:\.git)?"
@@ -799,17 +787,19 @@ def build_cgit_purl(url):
799787

800788
if match := re.search(kernel_shorthand, url):
801789
res = match.groupdict()
802-
namespace = res["domain"]
790+
namespace = "git.kernel.org/pub/scm/linux/kernel/git/stable/"
791+
name = "linux"
803792
elif match := re.search(cgit_project_pattern, url):
804793
res = match.groupdict()
805-
namespace = f"{res['domain']}/{res['namespace']}"
794+
name = res["name"]
795+
namespace = res['namespace']
806796
else:
807797
return None
808798

809799
return PackageURL(
810800
type="generic",
811801
namespace=namespace,
812-
name=res["name"],
802+
name=name,
813803
version=res["version"],
814804
qualifiers={},
815805
subpath="",
@@ -838,21 +828,17 @@ def build_gitiles_purl(url):
838828

839829
gitiles_project_pattern = (
840830
r"^https?://"
841-
r"(?P<domain>[^/]+)/"
842-
r"(?:(?P<namespace>(?:(?!/\+/).)+)/)?"
843-
r"(?P<name>(?:(?!/\+/)[^/])+)"
831+
r"(?P<namespace>(?:(?!/\+/).)+)/"
832+
r"(?P<name>[^/]+)"
844833
r"(?:/\+/(?P<version>[0-9a-fA-F]{7,64}))?"
845834
r"/?$"
846835
)
847836

848837
match = re.search(gitiles_project_pattern, url)
849838
if match:
850-
raw_namespace = match.group("namespace")
851-
domain = match.group("domain")
852-
namespace = f"{domain}/{raw_namespace}" if raw_namespace else domain
853839
return PackageURL(
854840
type="generic",
855-
namespace=namespace,
841+
namespace=match.group("namespace"),
856842
name=match.group("name"),
857843
version=match.group("version"),
858844
qualifiers={},
@@ -879,7 +865,6 @@ def build_allura_purl(url):
879865

880866
allura_pattern = (
881867
r"^https?://"
882-
r"(?P<domain>[^/]+)/"
883868
r"(?P<namespace>.+?)/"
884869
r"(?P<name>[^/]+?)"
885870
r"(?:/ci/(?P<version>[0-9a-fA-F]{7,64}))?"
@@ -888,11 +873,9 @@ def build_allura_purl(url):
888873

889874
commit_match = re.search(allura_pattern, url)
890875
if commit_match:
891-
domain = commit_match.group("domain")
892-
namespace = f"{domain}/{commit_match.group('namespace')}"
893876
return PackageURL(
894877
type="generic",
895-
namespace=namespace,
878+
namespace=commit_match.group('namespace'),
896879
name=commit_match.group("name"),
897880
version=commit_match.group("version"),
898881
qualifiers={},
@@ -905,6 +888,7 @@ def build_allura_purl(url):
905888
r"git\.postgresql\.org/gitweb",
906889
"sourceware\.org/git",
907890
"git\.openssl\.org/gitweb",
891+
"gitbox\.apache\.org",
908892
]
909893
GITWEB_ROUTE_REGEX = build_route_regex(GITWEB_DOMAINS)
910894

@@ -920,6 +904,7 @@ def build_gitweb_purl(url):
920904
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339
921905
https://git.postgresql.org/gitweb/?p=hamn.git;a=commit;h=a796b71a5b3fe7f751f1086a08cb114b9877dea2
922906
https://sourceware.org/git/?p=glibc.git;a=commit;h=dedebed24f77762eea7d3c5ed2739a90a4d60461
907+
https://gitbox.apache.org/repos/asf?p=xalan-java.git;a=commit;h=da3e0d06b467247643ce04e88d3346739d119f21
923908
"""
924909

925910
gitweb_pattern = (

tests/contrib/data/url2purl.json

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -289,25 +289,22 @@
289289
"https://cgit.git.savannah.gnu.org/cgit/uddf.git/commit/?id=98c41e131dc952aee43d4ec392b80ca4c426be8d": "pkg:generic/cgit.git.savannah.gnu.org/cgit/uddf@98c41e131dc952aee43d4ec392b80ca4c426be8d",
290290
"https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git/commit/?id=7457fe9541b5162f285454947448d553a5d5a531": "pkg:generic/git.kernel.org/pub/scm/virt/kvm/mst/qemu@7457fe9541b5162f285454947448d553a5d5a531",
291291
"https://git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev.git/commit/?h=for-next&id=bd771cf5c4254511cc4abb88f3dab3bd58bdf8e8": "pkg:generic/git.kernel.org/pub/scm/linux/kernel/git/deller/linux-fbdev@bd771cf5c4254511cc4abb88f3dab3bd58bdf8e8",
292-
"https://git.kernel.org/stable/c/9a9a8fe26751334b7739193a94eba741073b8a55": "pkg:generic/git.kernel.org/stable@9a9a8fe26751334b7739193a94eba741073b8a55",
292+
"https://git.kernel.org/stable/c/9a9a8fe26751334b7739193a94eba741073b8a55": "pkg:generic/git.kernel.org/pub/scm/linux/kernel/git/stable/linux@9a9a8fe26751334b7739193a94eba741073b8a55",
293293
"https://gitweb.gentoo.org/dev/darkside.git/commit/?id=8d4b0836f3b6ab7075212926d9aad0b50246d825": "pkg:generic/gitweb.gentoo.org/dev/darkside@8d4b0836f3b6ab7075212926d9aad0b50246d825",
294294
"https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=f73ae47c5e48010f504f3f55567152258f3013ae": "pkg:generic/gitweb.gentoo.org/repo/gentoo@f73ae47c5e48010f504f3f55567152258f3013ae",
295-
296295
"https://android.googlesource.com/platform/frameworks/base/+/b4da73a935a8c906ff5df562155824d63ac849ab": "pkg:generic/android.googlesource.com/platform/frameworks/base@b4da73a935a8c906ff5df562155824d63ac849ab",
297296
"https://android.googlesource.com/device/generic/vulkan-cereal/+/240dedcb0fa917b3d2dcc4a9d4c332697c5e48a0": "pkg:generic/android.googlesource.com/device/generic/vulkan-cereal@240dedcb0fa917b3d2dcc4a9d4c332697c5e48a0",
298297
"https://chromium.googlesource.com/aosp/platform/external/dbus-binding-generator/+/7574c671c7c64aab957dc507fffff3c8c38dc7cb": "pkg:generic/chromium.googlesource.com/aosp/platform/external/dbus-binding-generator@7574c671c7c64aab957dc507fffff3c8c38dc7cb",
299298
"https://aomedia.googlesource.com/libavifinfo/+/43716e9c34d3389b4882fbd1a81c04543ed04fe3": "pkg:generic/aomedia.googlesource.com/libavifinfo@43716e9c34d3389b4882fbd1a81c04543ed04fe3",
300299
"https://gerrit.googlesource.com/gerrit/+/45071d6977932bca5a1427c8abad24710fed2e33": "pkg:generic/gerrit.googlesource.com/gerrit@45071d6977932bca5a1427c8abad24710fed2e33",
301-
302300
"https://sourceforge.net/p/djvu/djvulibre-git/ci/e15d51510048927f172f1bf1f27ede65907d940d": "pkg:generic/sourceforge.net/p/djvu/djvulibre-git@e15d51510048927f172f1bf1f27ede65907d940d",
303301
"https://sourceforge.net/p/expat/code_git/ci/f0bec73b018caa07d3e75ec8dd967f3785d71bde": "pkg:generic/sourceforge.net/p/expat/code_git@f0bec73b018caa07d3e75ec8dd967f3785d71bde",
304302
"https://forge-allura.apache.org/p/allura/git/ci/674e070e5ca7db7c75cf61d8efd2a3e3e49bd946": "pkg:generic/forge-allura.apache.org/p/allura/git@674e070e5ca7db7c75cf61d8efd2a3e3e49bd946",
305-
306303
"https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339": "pkg:generic/gcc.gnu.org/git/gcc@82cc94e5fb69d1c45a386f83798251de5bff9339",
307304
"https://git.postgresql.org/gitweb/?p=hamn.git;a=commit;h=a796b71a5b3fe7f751f1086a08cb114b9877dea2": "pkg:generic/git.postgresql.org/gitweb/hamn@a796b71a5b3fe7f751f1086a08cb114b9877dea2",
308305
"https://git.openssl.org/gitweb/?p=openssl.git;a=commitdiff;h=1ad73b4d27bd8c1b369a3cd453681d3a4f1bb9b2": "pkg:generic/git.openssl.org/gitweb/openssl@1ad73b4d27bd8c1b369a3cd453681d3a4f1bb9b2",
309306
"https://sourceware.org/git/?p=bunsen.git;a=commit;h=6c55933f37099517e050c923527b0b2267e1deed": "pkg:generic/sourceware.org/git/bunsen@6c55933f37099517e050c923527b0b2267e1deed",
310-
307+
"https://gitbox.apache.org/repos/asf?p=xalan-java.git;a=commit;h=da3e0d06b467247643ce04e88d3346739d119f21": "pkg:generic/gitbox.apache.org/repos/asf/xalan-java@da3e0d06b467247643ce04e88d3346739d119f21",
311308
"https://git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm": "pkg:generic/git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm",
312309
"https://gitlab.gnome.org/GNOME/gimp": "pkg:generic/gitlab.gnome.org/GNOME/gimp",
313310
"https://gitlab.freedesktop.org/poppler/poppler": "pkg:generic/gitlab.freedesktop.org/poppler/poppler",
@@ -335,5 +332,6 @@
335332
"https://gcc.gnu.org/git/?p=gcc.git": "pkg:generic/gcc.gnu.org/git/gcc",
336333
"https://git.postgresql.org/gitweb/?p=hamn.git": "pkg:generic/git.postgresql.org/gitweb/hamn",
337334
"https://git.openssl.org/gitweb/?p=openssl.git": "pkg:generic/git.openssl.org/gitweb/openssl",
338-
"https://sourceware.org/git/?p=bunsen.git": "pkg:generic/sourceware.org/git/bunsen"
335+
"https://sourceware.org/git/?p=bunsen.git": "pkg:generic/sourceware.org/git/bunsen",
336+
"https://gitbox.apache.org/repos/asf?p=xalan-java.git": "pkg:generic/gitbox.apache.org/repos/asf/xalan-java"
339337
}

0 commit comments

Comments
 (0)