Skip to content

Commit 3b42137

Browse files
committed
Support custom Artifactory repositories for package metadata
When using --index-url with a custom Artifactory repository, dependency resolution works but the packages array comes back empty. This happens because get_pypi_data_from_purl() hardcodes https://pypi.org/pypi for the JSON API endpoint. Internal packages that don't exist on PyPI.org return 404 and are silently skipped. The fix includes deriving the JSON API base URL from the provided repository instead of hardcoding PyPI.org It is also necessary to match distribution files by filename (standardized per PEP 427/491) instead of full URL, since URL paths can differ between Simple API and JSON API endpoints. Signed-off-by: Kai Hodžić <hodzic.e.k@outlook.com>
1 parent b9df6f9 commit 3b42137

3 files changed

Lines changed: 229 additions & 32 deletions

File tree

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
Changelog
22
=========
33

4+
v0.15.1
5+
-----------
6+
7+
- Fix package metadata fetch for Artifactory repositories
8+
49
v0.15.0
510
-----------
611

src/python_inspector/package_data.py

Lines changed: 53 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@
99
# See https://aboutcode.org for more information about nexB OSS projects.
1010
#
1111

12+
import posixpath
1213
from typing import Dict
1314
from typing import List
1415
from typing import Optional
16+
from urllib.parse import urljoin
17+
from urllib.parse import urlparse
1518

1619
from packageurl import PackageURL
1720

@@ -30,67 +33,85 @@ async def get_pypi_data_from_purl(
3033
purl: str, environment: Environment, repos: List[PypiSimpleRepository], prefer_source: bool
3134
) -> Optional[PackageData]:
3235
"""
33-
Generate `Package` object from the `purl` string of pypi type
36+
Return a PackageData object from the ``purl`` string of pypi type.
3437
35-
``purl`` is a package-url of pypi type
36-
``environment`` is a `Environment` object defaulting Python version 3.8 and linux OS
37-
``repos`` is a list of `PypiSimpleRepository` objects
38-
``prefer_source`` is a boolean value to prefer source distribution over wheel,
39-
if no source distribution is available then wheel is used
38+
``environment`` is an Environment object with Python version and OS.
39+
``repos`` is a list of PypiSimpleRepository objects to try in order.
40+
``prefer_source`` prefers source distribution over wheel when True.
4041
"""
42+
from python_inspector.utils import get_response_async
43+
4144
parsed_purl = PackageURL.from_string(purl)
4245
name = parsed_purl.name
4346
version = parsed_purl.version
4447
if not version:
4548
raise Exception("Version is not specified in the purl")
46-
base_path = "https://pypi.org/pypi"
47-
api_url = f"{base_path}/{name}/{version}/json"
48-
49-
from python_inspector.utils import get_response_async
50-
51-
response = await get_response_async(api_url)
52-
if not response:
53-
return None
5449

55-
info = response.get("info") or {}
56-
homepage_url = info.get("home_page")
57-
project_urls = info.get("project_urls") or {}
58-
code_view_url = get_pypi_codeview_url(project_urls)
59-
bug_tracking_url = get_pypi_bugtracker_url(project_urls)
6050
python_version = get_python_version_from_env_tag(python_version=environment.python_version)
51+
52+
# Collect distribution URLs from repos
6153
valid_distribution_urls = []
6254
sdist_url = await get_sdist_download_url(
6355
purl=parsed_purl, repos=repos, python_version=python_version
6456
)
6557
if sdist_url:
6658
valid_distribution_urls.append(sdist_url)
6759

68-
valid_distribution_urls = [url for url in valid_distribution_urls if url]
69-
70-
# if prefer_source is True then only source distribution is used
71-
# in case of no source distribution available then wheel is used
7260
if not valid_distribution_urls or not prefer_source:
73-
wheel_urls = [
74-
item
75-
for item in await get_wheel_download_urls(
61+
wheel_urls = list(
62+
await get_wheel_download_urls(
7663
purl=parsed_purl,
7764
repos=repos,
7865
environment=environment,
7966
python_version=python_version,
8067
)
81-
]
68+
)
8269
wheel_url = choose_single_wheel(wheel_urls)
8370
if wheel_url:
8471
valid_distribution_urls.insert(0, wheel_url)
8572

86-
urls = {url.get("url"): url for url in response.get("urls") or []}
87-
# iterate over the valid distribution urls and return the first
88-
# one that is matching.
73+
# Build list of JSON API URLs to try: each repo's /pypi endpoint, then PyPI.org as fallback
74+
api_urls = []
75+
for repo in repos:
76+
# Convert /simple to /pypi for Artifactory-style JSON API
77+
base_path = repo.index_url.replace("/simple", "/pypi")
78+
api_urls.append(f"{base_path}/{name}/{version}/json")
79+
api_urls.append(f"https://pypi.org/pypi/{name}/{version}/json")
80+
81+
# Try each API URL until one succeeds
82+
response = None
83+
api_url = None
84+
for url in api_urls:
85+
response = await get_response_async(url)
86+
if response:
87+
api_url = url
88+
break
89+
90+
if not response:
91+
return None
92+
93+
info = response.get("info") or {}
94+
homepage_url = info.get("home_page")
95+
project_urls = info.get("project_urls") or {}
96+
code_view_url = get_pypi_codeview_url(project_urls)
97+
bug_tracking_url = get_pypi_bugtracker_url(project_urls)
98+
99+
# Index by filename for matching (paths differ between /simple and /pypi endpoints)
100+
urls_by_filename = {}
101+
for url_entry in response.get("urls") or []:
102+
url = url_entry.get("url")
103+
if url:
104+
absolute_url = urljoin(api_url, url)
105+
filename = posixpath.basename(urlparse(absolute_url).path)
106+
urls_by_filename[filename] = url_entry
107+
108+
# Match distribution URLs by filename
89109
for dist_url in valid_distribution_urls:
90-
if dist_url not in urls:
110+
filename = posixpath.basename(urlparse(dist_url).path)
111+
url_data = urls_by_filename.get(filename)
112+
if not url_data:
91113
continue
92114

93-
url_data = urls.get(dist_url)
94115
digests = url_data.get("digests") or {}
95116

96117
return PackageData(

tests/test_package_data.py

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
#
4+
# Copyright (c) nexB Inc. and others. All rights reserved.
5+
# ScanCode is a trademark of nexB Inc.
6+
# SPDX-License-Identifier: Apache-2.0
7+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
8+
# See https://github.com/aboutcode-org/python-inspector for support or download.
9+
# See https://aboutcode.org for more information about nexB OSS projects.
10+
#
11+
12+
import json
13+
import os
14+
from unittest import mock
15+
16+
import pytest
17+
from commoncode.testcase import FileDrivenTesting
18+
19+
from python_inspector.package_data import get_pypi_data_from_purl
20+
from python_inspector.utils_pypi import Environment
21+
from python_inspector.utils_pypi import PypiSimpleRepository
22+
23+
test_env = FileDrivenTesting()
24+
test_env.test_data_dir = os.path.join(os.path.dirname(__file__), "data")
25+
26+
27+
@pytest.mark.asyncio
28+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
29+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
30+
@mock.patch("python_inspector.utils.get_response_async")
31+
async def test_get_pypi_data_from_purl_tries_repos_in_order(
32+
mock_get_response, mock_get_wheels, mock_get_sdist
33+
):
34+
mock_get_sdist.return_value = None
35+
mock_get_wheels.return_value = []
36+
37+
call_urls = []
38+
39+
async def track_calls(url):
40+
call_urls.append(url)
41+
return None
42+
43+
mock_get_response.side_effect = track_calls
44+
45+
repo1 = PypiSimpleRepository(index_url="https://repo1.example.com/simple")
46+
repo2 = PypiSimpleRepository(index_url="https://repo2.example.com/simple")
47+
env = Environment(python_version="310", operating_system="linux")
48+
49+
await get_pypi_data_from_purl(
50+
purl="pkg:pypi/requests@2.28.0",
51+
environment=env,
52+
repos=[repo1, repo2],
53+
prefer_source=False,
54+
)
55+
56+
assert call_urls == [
57+
"https://repo1.example.com/pypi/requests/2.28.0/json",
58+
"https://repo2.example.com/pypi/requests/2.28.0/json",
59+
"https://pypi.org/pypi/requests/2.28.0/json",
60+
]
61+
62+
63+
@pytest.mark.asyncio
64+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
65+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
66+
@mock.patch("python_inspector.utils.get_response_async")
67+
async def test_get_pypi_data_from_purl_stops_on_first_success(
68+
mock_get_response, mock_get_wheels, mock_get_sdist
69+
):
70+
mock_get_sdist.return_value = None
71+
mock_get_wheels.return_value = []
72+
73+
call_urls = []
74+
75+
async def return_success_on_second(url):
76+
call_urls.append(url)
77+
if "repo2" in url:
78+
return {"info": {}, "urls": []}
79+
return None
80+
81+
mock_get_response.side_effect = return_success_on_second
82+
83+
repo1 = PypiSimpleRepository(index_url="https://repo1.example.com/simple")
84+
repo2 = PypiSimpleRepository(index_url="https://repo2.example.com/simple")
85+
env = Environment(python_version="310", operating_system="linux")
86+
87+
await get_pypi_data_from_purl(
88+
purl="pkg:pypi/requests@2.28.0",
89+
environment=env,
90+
repos=[repo1, repo2],
91+
prefer_source=False,
92+
)
93+
94+
assert call_urls == [
95+
"https://repo1.example.com/pypi/requests/2.28.0/json",
96+
"https://repo2.example.com/pypi/requests/2.28.0/json",
97+
]
98+
99+
100+
@pytest.mark.asyncio
101+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
102+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
103+
@mock.patch("python_inspector.utils.get_response_async")
104+
async def test_get_pypi_data_from_purl_falls_back_to_pypi_org(
105+
mock_get_response, mock_get_wheels, mock_get_sdist
106+
):
107+
mock_get_sdist.return_value = None
108+
mock_get_wheels.return_value = []
109+
110+
call_urls = []
111+
112+
async def track_calls(url):
113+
call_urls.append(url)
114+
return None
115+
116+
mock_get_response.side_effect = track_calls
117+
118+
env = Environment(python_version="310", operating_system="linux")
119+
120+
await get_pypi_data_from_purl(
121+
purl="pkg:pypi/requests@2.28.0",
122+
environment=env,
123+
repos=[],
124+
prefer_source=False,
125+
)
126+
127+
assert call_urls == ["https://pypi.org/pypi/requests/2.28.0/json"]
128+
129+
130+
@pytest.mark.asyncio
131+
@mock.patch("python_inspector.package_data.get_sdist_download_url")
132+
@mock.patch("python_inspector.package_data.get_wheel_download_urls")
133+
@mock.patch("python_inspector.utils.get_response_async")
134+
async def test_get_pypi_data_from_purl_matches_by_filename(
135+
mock_get_response, mock_get_wheels, mock_get_sdist
136+
):
137+
mock_get_sdist.return_value = None
138+
mock_get_wheels.return_value = [
139+
"https://repo.example.com/simple/../packages/ab/cd/requests-2.28.0-py3-none-any.whl"
140+
]
141+
142+
async def return_json_response(url):
143+
if "pypi" in url:
144+
return {
145+
"info": {"name": "requests", "version": "2.28.0"},
146+
"urls": [
147+
{
148+
"url": "../../packages/xy/zz/requests-2.28.0-py3-none-any.whl",
149+
"digests": {"sha256": "abc123", "md5": "def456"},
150+
"size": 12345,
151+
}
152+
],
153+
}
154+
return None
155+
156+
mock_get_response.side_effect = return_json_response
157+
158+
repo = PypiSimpleRepository(index_url="https://repo.example.com/simple")
159+
env = Environment(python_version="310", operating_system="linux")
160+
161+
result = await get_pypi_data_from_purl(
162+
purl="pkg:pypi/requests@2.28.0",
163+
environment=env,
164+
repos=[repo],
165+
prefer_source=False,
166+
)
167+
168+
assert result is not None
169+
assert result.sha256 == "abc123"
170+
assert result.md5 == "def456"
171+
assert result.size == 12345

0 commit comments

Comments
 (0)