Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/sphinx/source/whatsnew/v0.15.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ Bug fixes
introduced in v0.15.1 (:pull:`2702`) that caused a broadcasting
``ValueError`` when ``tracker_theta`` was a 2-D (or higher rank) array.
(:issue:`2747`, :pull:`2749`)
* :py:func:`pvlib.iotools.read_nsrdb_psm4` now parses the file header with the
:py:mod:`csv` module instead of a naive ``str.split(',')``, so quoted column
names containing commas (e.g. the material names in spectral-on-demand files)
are no longer split into spurious columns. (:issue:`2736`, :pull:`2771`)

Enhancements
~~~~~~~~~~~~
Expand Down Expand Up @@ -63,6 +67,7 @@ Maintenance
Contributors
~~~~~~~~~~~~
* :ghuser:`Omesh37`
* :ghuser:`gaoflow`
* Cliff Hansen (:ghuser:`cwhanse`)
* :ghuser:`shethkajal7`
* Arthur Onno (:ghuser:`ArthurOnnoTerabase`)
Expand Down
12 changes: 9 additions & 3 deletions pvlib/iotools/psm4.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
https://developer.nlr.gov/docs/solar/nsrdb/nsrdb-GOES-full-disc-v4-0-0-download/
"""

import csv
import io
from urllib.parse import urljoin
import requests
Expand Down Expand Up @@ -723,11 +724,16 @@ def read_nsrdb_psm4(filename, map_variables=True):
<https://web.archive.org/web/20170207203107/https://sam.nrel.gov/sites/default/files/content/documents/pdf/wfcsv.pdf>`_
"""
with tools._file_context_manager(filename) as fbuf:
# The first 3 header lines are parsed with the csv module rather than a
# naive str.split(',') so that quoted fields containing commas are kept
# intact. Spectral-on-demand files, for instance, have column names
# like '"GaAs (Bauhuis et al., 2009)"' whose embedded commas would
# otherwise be split into spurious columns (see GH #2736).
# The first 2 lines of the response are headers with metadata
metadata_fields = fbuf.readline().split(',')
metadata_values = fbuf.readline().split(',')
metadata_fields = next(csv.reader([fbuf.readline()]))
metadata_values = next(csv.reader([fbuf.readline()]))
# get the column names so we can set the dtypes
columns = fbuf.readline().split(',')
columns = next(csv.reader([fbuf.readline()]))
columns[-1] = columns[-1].strip() # strip trailing newline
# Since the header has so many columns, excel saves blank cols in the
# data below the header lines.
Expand Down
25 changes: 25 additions & 0 deletions tests/iotools/test_psm4.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,31 @@ def test_read_nsrdb_psm4_map_variables():
assert_index_equal(data.columns, pd.Index(columns_mapped))


def test_read_nsrdb_psm4_quoted_columns_with_commas():
"""spectral-on-demand files have quoted column names containing commas;
these must not be split into spurious columns (GH #2736)"""
# Minimal NSRDB file whose column header (3rd line) has quoted material
# names with embedded commas, which is valid CSV. A naive str.split(',')
# would break these into extra columns and raise on read.
content = (
"Source,Location ID,City,State,Country,Latitude,Longitude,Time Zone,"
"Elevation,Local Time Zone,Version\n"
"NSRDB,1,-,-,-,40.0,-105.0,-7,1600,-7,4.0.1\n"
'Year,Month,Day,Hour,Minute,GHI,"GaAs (Bauhuis et al., 2009)",'
'"InGaP (Gray, 2008)"\n'
"2023,1,1,0,0,0,0.1,0.2\n"
"2023,1,1,1,0,5,0.3,0.4\n"
)
data, metadata = psm4.read_nsrdb_psm4(StringIO(content),
map_variables=False)
assert list(data.columns) == [
'Year', 'Month', 'Day', 'Hour', 'Minute', 'GHI',
'GaAs (Bauhuis et al., 2009)', 'InGaP (Gray, 2008)']
assert data.shape == (2, 8)
# the embedded-comma data columns round-trip as floats
assert data['GaAs (Bauhuis et al., 2009)'].tolist() == [0.1, 0.3]


@pytest.mark.remote_data
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
def test_get_nsrdb_psm4_aggregated_parameter_mapping(nlr_api_key):
Expand Down