Skip to content

Commit c5e2dc5

Browse files
committed
Support custom Artifactory repositories for package metadata
When using --index-url with a custom Artifactory repository, dependency resolution works but the packages array comes back empty. This happens because get_pypi_data_from_purl() hardcodes https://pypi.org/pypi for the JSON API endpoint. Internal packages that don't exist on PyPI.org return 404 and are silently skipped. The fix includes deriving the JSON API base URL from the provided repository instead of hardcoding PyPI.org. It is also necessary to match distribution files by filename (standardized per PEP 427/491) instead of full URL, since URL paths can differ between Simple API and JSON API endpoints. Signed-off-by: Kai Hodžić <hodzic.e.k@outlook.com>
1 parent b9df6f9 commit c5e2dc5

4 files changed

Lines changed: 247 additions & 7 deletions

File tree

CHANGELOG.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ v0.15.0
77
- Drop support for python3.9 and add support for python3.14
88
- Ensure that cached file is not empty before use https://github.com/aboutcode-org/python-inspector/pull/251
99
- Filter out empty values from install_requires https://github.com/aboutcode-org/python-inspector/pull/250
10+
- Support custom Artifactory repositories for package metadata by trying each
11+
repo's JSON API endpoint before falling back to PyPI.org
1012

1113
v0.14.4
1214
-----------

src/python_inspector/package_data.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
# See https://aboutcode.org for more information about nexB OSS projects.
1010
#
1111

12+
import posixpath
1213
from typing import Dict
1314
from typing import List
1415
from typing import Optional
16+
from urllib.parse import urlparse
1517

1618
from packageurl import PackageURL
1719

@@ -43,12 +45,26 @@ async def get_pypi_data_from_purl(
4345
version = parsed_purl.version
4446
if not version:
4547
raise Exception("Version is not specified in the purl")
46-
base_path = "https://pypi.org/pypi"
47-
api_url = f"{base_path}/{name}/{version}/json"
48+
49+
# Build list of JSON API URLs to try: each repo's /pypi endpoint, then PyPI.org as fallback.
50+
# For Artifactory, the /simple endpoint has a corresponding /pypi JSON API endpoint.
51+
api_urls = []
52+
for repo in repos:
53+
base_path = repo.index_url.replace("/simple", "/pypi")
54+
api_urls.append(f"{base_path}/{name}/{version}/json")
55+
api_urls.append(f"https://pypi.org/pypi/{name}/{version}/json")
4856

4957
from python_inspector.utils import get_response_async
5058

51-
response = await get_response_async(api_url)
59+
# Try each API URL until one succeeds
60+
response = None
61+
api_url = None
62+
for url in api_urls:
63+
response = await get_response_async(url)
64+
if response:
65+
api_url = url
66+
break
67+
5268
if not response:
5369
return None
5470

@@ -83,14 +99,22 @@ async def get_pypi_data_from_purl(
8399
if wheel_url:
84100
valid_distribution_urls.insert(0, wheel_url)
85101

86-
urls = {url.get("url"): url for url in response.get("urls") or []}
102+
# Index by filename for matching since distribution URLs from /simple may have
103+
# different paths than URLs from /pypi JSON API (especially with Artifactory)
104+
urls_by_filename = {}
105+
for url_entry in response.get("urls") or []:
106+
entry_url = url_entry.get("url")
107+
if entry_url:
108+
filename = posixpath.basename(urlparse(entry_url).path)
109+
urls_by_filename[filename] = url_entry
110+
87111
# iterate over the valid distribution urls and return the first
88112
# one that is matching.
89113
for dist_url in valid_distribution_urls:
90-
if dist_url not in urls:
114+
filename = posixpath.basename(urlparse(dist_url).path)
115+
url_data = urls_by_filename.get(filename)
116+
if not url_data:
91117
continue
92-
93-
url_data = urls.get(dist_url)
94118
digests = url_data.get("digests") or {}
95119

96120
return PackageData(
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
"type": "pypi",
3+
"namespace": null,
4+
"name": "requests",
5+
"version": "2.28.0",
6+
"qualifiers": {},
7+
"subpath": null,
8+
"primary_language": "Python",
9+
"description": "",
10+
"release_date": "2022-06-29T15:30:00",
11+
"parties": [],
12+
"keywords": [],
13+
"homepage_url": "https://requests.readthedocs.io",
14+
"download_url": "https://repo.example.com/simple/../packages/ab/cd/requests-2.28.0-py3-none-any.whl",
15+
"size": 62500,
16+
"sha1": null,
17+
"md5": "789xyz",
18+
"sha256": "abc123def456",
19+
"sha512": null,
20+
"bug_tracking_url": null,
21+
"code_view_url": null,
22+
"vcs_url": null,
23+
"copyright": null,
24+
"license_expression": "Apache-2.0",
25+
"declared_license": {},
26+
"notice_text": null,
27+
"source_packages": [],
28+
"file_references": [],
29+
"extra_data": {},
30+
"dependencies": [],
31+
"repository_homepage_url": null,
32+
"repository_download_url": null,
33+
"api_data_url": "https://repo.example.com/pypi/requests/2.28.0/json",
34+
"datasource_id": null,
35+
"purl": "pkg:pypi/requests@2.28.0"
36+
}

tests/test_package_data.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) nexB Inc. and others. All rights reserved.
5+
# ScanCode is a trademark of nexB Inc.
6+
# SPDX-License-Identifier: Apache-2.0
7+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
8+
# See https://github.com/aboutcode-org/python-inspector for support or download.
9+
# See https://aboutcode.org for more information about nexB OSS projects.
10+
#
11+
12+
import os
13+
from unittest import mock
14+
15+
import pytest
16+
from commoncode.testcase import FileDrivenTesting
17+
from test_cli import check_data_results
18+
19+
from python_inspector.package_data import get_pypi_data_from_purl
20+
from python_inspector.utils_pypi import Environment
21+
from python_inspector.utils_pypi import PypiSimpleRepository
22+
23+
test_env = FileDrivenTesting()
24+
test_env.test_data_dir = os.path.join(os.path.dirname(__file__), "data")
25+
26+
27+
@pytest.mark.asyncio
28+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
29+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
30+
@mock.patch("python_inspector.utils.get_response_async")
31+
async def test_get_pypi_data_from_purl_tries_repos_in_order(
32+
mock_get_response, mock_get_wheels, mock_get_sdist
33+
):
34+
mock_get_sdist.return_value = None
35+
mock_get_wheels.return_value = []
36+
37+
call_urls = []
38+
39+
async def track_calls(url):
40+
call_urls.append(url)
41+
return None
42+
43+
mock_get_response.side_effect = track_calls
44+
45+
repo1 = PypiSimpleRepository(index_url="https://repo1.example.com/simple")
46+
repo2 = PypiSimpleRepository(index_url="https://repo2.example.com/simple")
47+
env = Environment(python_version="310", operating_system="linux")
48+
49+
await get_pypi_data_from_purl(
50+
purl="pkg:pypi/requests@2.28.0",
51+
environment=env,
52+
repos=[repo1, repo2],
53+
prefer_source=False,
54+
)
55+
56+
assert call_urls == [
57+
"https://repo1.example.com/pypi/requests/2.28.0/json",
58+
"https://repo2.example.com/pypi/requests/2.28.0/json",
59+
"https://pypi.org/pypi/requests/2.28.0/json",
60+
]
61+
62+
63+
@pytest.mark.asyncio
64+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
65+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
66+
@mock.patch("python_inspector.utils.get_response_async")
67+
async def test_get_pypi_data_from_purl_stops_on_first_success(
68+
mock_get_response, mock_get_wheels, mock_get_sdist
69+
):
70+
mock_get_sdist.return_value = None
71+
mock_get_wheels.return_value = []
72+
73+
call_urls = []
74+
75+
async def return_success_on_second(url):
76+
call_urls.append(url)
77+
if "repo2" in url:
78+
return {"info": {}, "urls": []}
79+
return None
80+
81+
mock_get_response.side_effect = return_success_on_second
82+
83+
repo1 = PypiSimpleRepository(index_url="https://repo1.example.com/simple")
84+
repo2 = PypiSimpleRepository(index_url="https://repo2.example.com/simple")
85+
env = Environment(python_version="310", operating_system="linux")
86+
87+
await get_pypi_data_from_purl(
88+
purl="pkg:pypi/requests@2.28.0",
89+
environment=env,
90+
repos=[repo1, repo2],
91+
prefer_source=False,
92+
)
93+
94+
assert call_urls == [
95+
"https://repo1.example.com/pypi/requests/2.28.0/json",
96+
"https://repo2.example.com/pypi/requests/2.28.0/json",
97+
]
98+
99+
100+
@pytest.mark.asyncio
101+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
102+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
103+
@mock.patch("python_inspector.utils.get_response_async")
104+
async def test_get_pypi_data_from_purl_falls_back_to_pypi_org(
105+
mock_get_response, mock_get_wheels, mock_get_sdist
106+
):
107+
mock_get_sdist.return_value = None
108+
mock_get_wheels.return_value = []
109+
110+
call_urls = []
111+
112+
async def track_calls(url):
113+
call_urls.append(url)
114+
return None
115+
116+
mock_get_response.side_effect = track_calls
117+
118+
env = Environment(python_version="310", operating_system="linux")
119+
120+
await get_pypi_data_from_purl(
121+
purl="pkg:pypi/requests@2.28.0",
122+
environment=env,
123+
repos=[],
124+
prefer_source=False,
125+
)
126+
127+
assert call_urls == ["https://pypi.org/pypi/requests/2.28.0/json"]
128+
129+
130+
@pytest.mark.asyncio
131+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
132+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
133+
@mock.patch("python_inspector.utils.get_response_async")
134+
async def test_get_pypi_data_from_purl_matches_by_filename(
135+
mock_get_response, mock_get_wheels, mock_get_sdist
136+
):
137+
mock_get_sdist.return_value = None
138+
mock_get_wheels.return_value = [
139+
"https://repo.example.com/simple/../packages/ab/cd/requests-2.28.0-py3-none-any.whl"
140+
]
141+
142+
async def return_json_response(url):
143+
if "pypi" in url:
144+
return {
145+
"info": {
146+
"name": "requests",
147+
"version": "2.28.0",
148+
"home_page": "https://requests.readthedocs.io",
149+
"license_expression": "Apache-2.0",
150+
},
151+
"urls": [
152+
{
153+
"url": "../../packages/xy/zz/requests-2.28.0-py3-none-any.whl",
154+
"digests": {"sha256": "abc123def456", "md5": "789xyz"},
155+
"size": 62500,
156+
"upload_time": "2022-06-29T15:30:00",
157+
}
158+
],
159+
}
160+
return None
161+
162+
mock_get_response.side_effect = return_json_response
163+
164+
repo = PypiSimpleRepository(index_url="https://repo.example.com/simple")
165+
env = Environment(python_version="310", operating_system="linux")
166+
167+
result = await get_pypi_data_from_purl(
168+
purl="pkg:pypi/requests@2.28.0",
169+
environment=env,
170+
repos=[repo],
171+
prefer_source=False,
172+
)
173+
174+
expected_file = test_env.get_test_loc(
175+
"test_get_pypi_data_from_purl_matches_by_filename-expected.json",
176+
must_exist=False,
177+
)
178+
check_data_results(results=result.to_dict(), expected_file=expected_file)

0 commit comments

Comments
 (0)