@@ -6692,16 +6692,197 @@ The content of the new file is:
66926692 4, 0, 5 ;
66936693 }
66946694
6695- ----
6696-
66976695.. _Coordinate-subampling:
66986696
66996697Coordinate subsampling
67006698^^^^^^^^^^^^^^^^^^^^^^
67016699
67026700`Lossy compression by coordinate subsampling `_ was introduced into the
6703- CF conventions at CF-1.9, but is not yet available in cfdm. It will be
6704- ready in a future 3.x.0 release.
6701+ CF conventions at CF-1.10 for applications for which the coordinates
6702+ can require considerably more storage than the data itself. Space may
6703+ be saved in the netCDF file by storing a subsample of the coordinates
6704+ that describe the data, and the uncompressed coordinate and auxiliary
6705+ coordinate variables are reconstituted by interpolation, from the
6706+ subsampled coordinate values to the domain of the data
6707+
6708+ This is illustrated with the file ``subsampled.nc `` (found in the
6709+ :ref: `sample datasets <Sample-datasets >`):
6710+
6711+
6712+ .. code-block:: console
6713+ :caption: *Inspect the compressed dataset with the ncdump command
6714+ line tool. *
6715+
6716+ $ ncdump -h subsampled.nc
6717+ netcdf subsampled {
6718+ dimensions:
6719+ time = 2 ;
6720+ lat = 18 ;
6721+ lon = 12 ;
6722+ tp_lat = 4 ;
6723+ tp_lon = 5 ;
6724+ variables:
6725+ float time(time) ;
6726+ time:standard_name = "time" ;
6727+ time:units = "days since 2000-01-01" ;
6728+ float lat(tp_lat, tp_lon) ;
6729+ lat:standard_name = "latitude" ;
6730+ lat:units = "degrees_north" ;
6731+ lat:bounds_tie_points = "lat_bounds" ;
6732+ float lon(tp_lat, tp_lon) ;
6733+ lon:standard_name = "longitude" ;
6734+ lon:units = "degrees_east" ;
6735+ lon:bounds_tie_points = "lon_bounds" ;
6736+ float lat_bounds(tp_lat, tp_lon) ;
6737+ float lon_bounds(tp_lat, tp_lon) ;
6738+ int lat_indices(tp_lat) ;
6739+ lat_indices:long_name = "Tie point indices for latitude dimension" ;
6740+ int lon_indices(tp_lon) ;
6741+ lon_indices:long_name = "Tie point indices for longitude dimension" ;
6742+ int bilinear ;
6743+ bilinear:interpolation_name = "bi_linear" ;
6744+ bilinear:computational_precision = "64" ;
6745+ bilinear:tie_point_mapping =
6746+ "lat: lat_indices tp_lat lon: lon_indices tp_lon" ;
6747+ float q(time, lat, lon) ;
6748+ q:standard_name = "specific_humidity" ;
6749+ q:units = "1" ;
6750+ q:coordinate_interpolation = "lat: lon: bilinear" ;
6751+
6752+ // global attributes:
6753+ :Conventions = "CF-1.11" ;
6754+ }
6755+
6756+
6757+ Reading and inspecting this file shows the latitude and longitude
6758+ coordinates in uncompressed form, whilst their underlying arrays are
6759+ still in subsampled representation described in the file:
6760+
6761+ .. code-block:: python
6762+ :caption: *Read a field construct from a dataset that has been
6763+ compressed by corodinate subsampling, and inspect
6764+ coordinates. *
6765+
6766+ >>> f = cf.read('subsampled.nc')[0]
6767+ >>> print(f)
6768+ Field: specific_humidity (ncvar%q)
6769+ ----------------------------------
6770+ Data : specific_humidity(time(2), ncdim%lat(18), ncdim%lon(12)) 1
6771+ Dimension coords: time(2) = [2000-01-01 00:00:00, 2000-02-01 00:00:00]
6772+ Auxiliary coords: latitude(ncdim%lat(18), ncdim%lon(12)) = [[-85.0, ..., 85.0]] degrees_north
6773+ : longitude(ncdim%lat(18), ncdim%lon(12)) = [[15.0, ..., 345.0]] degrees_east
6774+ >>> lon = f.construct('longitude')
6775+ >>> lon
6776+ <AuxiliaryCoordinate: longitude(18, 12) degrees_east>
6777+ >>> lon.data.source()
6778+ <SubsampledArray(18, 12): >
6779+ >>> print(lon.array)
6780+ [[15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6781+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6782+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6783+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6784+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6785+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6786+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6787+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6788+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6789+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6790+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6791+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6792+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6793+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6794+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6795+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6796+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]
6797+ [15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]]
6798+ >>> lon.data.source().source()
6799+ <Data(4, 5): [[15.0, ..., 345.0]]>
6800+ >>> print(lon.data.source().source().array)
6801+ [[ 15. 135. 225. 255. 345.]
6802+ [ 15. 135. 225. 255. 345.]
6803+ [ 15. 135. 225. 255. 345.]
6804+ [ 15. 135. 225. 255. 345.]]
6805+
6806+ As with all other forms of compression, the field may be treated as if
6807+ were not compressed:
6808+
6809+ .. code-block:: python
6810+ :caption: *Get subspaces based on indices of the uncompressed
6811+ data. *
6812+
6813+ >>> g = f[0, 6, :]
6814+ >>> print(g)
6815+ Field: specific_humidity (ncvar%q)
6816+ ----------------------------------
6817+ Data : specific_humidity(time(1), ncdim%lat(1), ncdim%lon(12)) 1
6818+ Dimension coords: time(1) = [2000-01-01 00:00:00]
6819+ Auxiliary coords: latitude(ncdim%lat(1), ncdim%lon(12)) = [[-25.0, ..., -25.0]] degrees_north
6820+ : longitude(ncdim%lat(1), ncdim%lon(12)) = [[15.0, ..., 345.0]] degrees_east
6821+ >>> print(g.construct('longitude').array)
6822+ [[15.0 45.0 75.0 105.0 135.0 165.0 195.0 225.0 255.0 285.0 315.0 345.0]]
6823+
6824+
6825+ The metadata that define the subsampling are contained within the
6826+ coordinate's `Data ` object:
6827+
6828+ .. code-block:: python
6829+ :caption: *Get subspaces based on indices of the uncompressed
6830+ data. *
6831+
6832+ >>> lon = f.construct('longitude')
6833+ >>> d = lon.data.source()
6834+ >>> d.get_tie_point_indices()
6835+ {0: <TiePointIndex: long_name=Tie point indices for latitude dimension(4) >,
6836+ 1: <TiePointIndex: long_name=Tie point indices for longitude dimension(5) >}
6837+ >>> d.get_computational_precision()
6838+ '64'
6839+
6840+ It is not yet, as of version 1.10.0.0, possible to write to disk a
6841+ field construct with compression by coordinate subsampling.
6842+
6843+ .. _Lossy-compression-via-quantization:
6844+
6845+ Lossy compression via quantization
6846+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6847+
6848+ `Lossy compression via quantization `_ eliminates false precision,
6849+ usually by rounding the least significant bits of floating-point
6850+ mantissas to zeros, so that a subsequent compression on disk is more
6851+ efficient. Quantization is described by the following parameters:
6852+
6853+ * The ``algorithm `` parameter names a specific quantization algorithm.
6854+
6855+ * The ``implementation `` parameter contains unstandardised text that
6856+ concisely conveys the algorithm provenance including the name of the
6857+ library or client that performed the quantization, the software
6858+ version, and any other information required to disambiguate the
6859+ source of the algorithm employed. The text must take the form
6860+ ``software-name version version-string [(optional-information)] ``.
6861+
6862+ * The retained precision of the algortqhm is defined with either the
6863+ ``quantization_nsb `` or ``quantization_nsd `` parameter.
6864+
6865+ If quantization has been applied to the data, then it may be described
6866+ with in a `Quantization ` object, accessed via the construct's
6867+ `!get_quantization ` method. To apply quantization at the time of
6868+ writing the data to disk, use the construct's `!set_quantize_on_write `
6869+ method:
6870+
6871+ .. code-block:: python
6872+ :caption: *Lossy compression via quantization. *
6873+
6874+ >>> q, t = cf.read('file.nc')
6875+ >>> t.set_quantize_on_write(algorithm='bitgroom', quantization_nsd=6)
6876+ >>> cf.write(t, 'quantized.nc')
6877+ >>> quantized = cf.read('quantized.nc')[0]
6878+ >>> c = quantized.get_quantization()
6879+ >>> c
6880+ <CF Quantization: _QuantizeBitGroomNumberOfSignificantDigits=6, algorithm=bitgroom, implementation=libnetcdf version 4.9.4-development, quantization_nsd=6>
6881+ >>> c.parameters()
6882+ {'algorithm': 'bitgroom',
6883+ 'implementation': 'libnetcdf version 4.9.4-development',
6884+ '_QuantizeBitGroomNumberOfSignificantDigits': np.int32(6),
6885+ 'quantization_nsd': np.int64(6)}
67056886
67066887----
67076888
0 commit comments