Skip to content

Commit abb9dc5

Browse files
committed
fix: Sanitize URLs to private repositories to not contain credentials
URLs to private repositories usually contain credentials. Signed-off-by: Nicolas Nobelis <nicolas.nobelis@bosch.com>
1 parent 2e122ee commit abb9dc5

1 file changed

Lines changed: 18 additions & 2 deletions

File tree

src/python_inspector/package_data.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,22 @@ async def get_pypi_data_from_purl(
9292
valid_distribution_urls.insert(0, wheel_url)
9393

9494
urls = {url.get("url"): url for url in response.get("urls") or []}
95+
96+
def remove_credentials_from_url(url: str):
97+
# Parse the URL into its components
98+
parsed = urlparse(url)
99+
100+
new_netloc = parsed.hostname
101+
if parsed.port:
102+
new_netloc += f":{parsed.port}"
103+
104+
# Create a new parsed result object, replacing the old netloc
105+
# with our new one that has no credentials.
106+
parsed = parsed._replace(netloc=new_netloc)
107+
url_without_credentials = urlunparse(parsed)
108+
109+
return url_without_credentials
110+
95111
# iterate over the valid distribution urls and return the first
96112
# one that is matching.
97113
for dist_url in valid_distribution_urls:
@@ -105,12 +121,12 @@ async def get_pypi_data_from_purl(
105121
primary_language="Python",
106122
description=get_description(info),
107123
homepage_url=homepage_url,
108-
api_data_url=api_url,
124+
api_data_url=remove_credentials_from_url(api_url),
109125
bug_tracking_url=bug_tracking_url,
110126
code_view_url=code_view_url,
111127
license_expression=info.get("license_expression"),
112128
declared_license=get_declared_license(info),
113-
download_url=dist_url,
129+
download_url=remove_credentials_from_url(dist_url),
114130
size=url_data.get("size"),
115131
md5=digests.get("md5") or url_data.get("md5_digest"),
116132
sha256=digests.get("sha256"),

0 commit comments

Comments
 (0)