From 099f7a11226ed82514764673c5ab2f72afaccd1e Mon Sep 17 00:00:00 2001 From: WyattBlue Date: Wed, 1 Jul 2026 15:42:56 -0400 Subject: [PATCH 1/2] Make transfer/primaries conversion opt-in in reformat, fixes #2208 sws_scale_frame (used since 17.0.0) validates color_trc/color_primaries and rejects RESERVED and other unsupported values (e.g. LOG) with EOPNOTSUPP, regressing plain reformat/to_ndarray to rgb24 on VP9 and NVDEC frames. The pre-17.0 sws_scale ignored these fields. Neutralize color_trc/color_primaries to UNSPECIFIED for the scale unless a destination value is explicitly requested, while preserving the source's tags on the returned frame. The YUV->RGB matrix and explicit conversions are unaffected. --- CHANGELOG.rst | 1 + av/video/reformatter.py | 57 +++++++++++++++++++++++++++++++++------- tests/test_colorspace.py | 47 +++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 25fd9e9ac..3152c1f56 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -46,6 +46,7 @@ Features: Fixes: +- Fix ``VideoFrame.reformat`` (and ``to_ndarray``/``to_rgb``/``to_image``) raising ``OSError`` ``Operation not supported`` on frames tagged with reserved or otherwise unsupported ``color_primaries``/``color_trc`` values (e.g. VP9 and NVDEC output); a transfer/primaries conversion is now only performed when explicitly requested by :gh-user:`WyattBlue` (:issue:`2208`). - Fix ``add_mux_stream`` producing unwritable Matroska files by extracting codec extradata from the bitstream before the header is written by :gh-user:`WyattBlue` (:issue:`2198`). - Encode GPU frames (e.g. CUDA frames from DLPack) directly with ``pix_fmt="cuda"`` by adopting the frame's ``hw_frames_ctx`` before opening the encoder by :gh-user:`WyattBlue` (:issue:`2199`). diff --git a/av/video/reformatter.py b/av/video/reformatter.py index e658ff6b4..59725571b 100644 --- a/av/video/reformatter.py +++ b/av/video/reformatter.py @@ -224,11 +224,14 @@ def reformat( ) c_src_color_range = _resolve_enum_value(src_color_range, ColorRange, 0) c_dst_color_range = _resolve_enum_value(dst_color_range, ColorRange, 0) + # Default to UNSPECIFIED (not the source's value) so that a transfer / + # primaries conversion is only performed when explicitly requested. See + # _reformat for why. c_dst_color_trc = _resolve_enum_value( - dst_color_trc, ColorTrc, frame.ptr.color_trc + dst_color_trc, ColorTrc, lib.AVCOL_TRC_UNSPECIFIED ) c_dst_color_primaries = _resolve_enum_value( - dst_color_primaries, ColorPrimaries, frame.ptr.color_primaries + dst_color_primaries, ColorPrimaries, lib.AVCOL_PRI_UNSPECIFIED ) c_threads: cython.int = threads if threads is not None else 0 c_width: cython.int = width if width is not None else frame.ptr.width @@ -277,12 +280,37 @@ def _reformat( new_frame.ptr.format = dst_format new_frame.ptr.width = width new_frame.ptr.height = height - new_frame.ptr.color_trc = cython.cast( - lib.AVColorTransferCharacteristic, dst_color_trc - ) - new_frame.ptr.color_primaries = cython.cast( - lib.AVColorPrimaries, dst_color_primaries + + # A transfer-characteristic / primaries conversion is opt-in. Unlike the + # pre-17.0 sws_scale, sws_scale_frame inspects color_trc/color_primaries + # and rejects RESERVED (and other unsupported) values with EOPNOTSUPP, + # which regressed plain reformats of e.g. VP9 / NVDEC frames (#2208). So + # only feed these fields to swscale when the caller explicitly requested a + # destination value; otherwise neutralize them for the scale (as the old + # sws_scale effectively did) while still preserving the source's tags on + # the returned frame's metadata. + convert_trc: cython.bint = dst_color_trc != lib.AVCOL_TRC_UNSPECIFIED + convert_primaries: cython.bint = ( + dst_color_primaries != lib.AVCOL_PRI_UNSPECIFIED ) + frame_src_color_trc: lib.AVColorTransferCharacteristic = frame.ptr.color_trc + frame_src_color_primaries: lib.AVColorPrimaries = frame.ptr.color_primaries + + if convert_trc: + new_frame.ptr.color_trc = cython.cast( + lib.AVColorTransferCharacteristic, dst_color_trc + ) + else: + frame.ptr.color_trc = lib.AVCOL_TRC_UNSPECIFIED + new_frame.ptr.color_trc = lib.AVCOL_TRC_UNSPECIFIED + + if convert_primaries: + new_frame.ptr.color_primaries = cython.cast( + lib.AVColorPrimaries, dst_color_primaries + ) + else: + frame.ptr.color_primaries = lib.AVCOL_PRI_UNSPECIFIED + new_frame.ptr.color_primaries = lib.AVCOL_PRI_UNSPECIFIED # Translate source and destination colorspace/range from SWS_CS_* to AVCOL_* # so sws_is_noop and sws_scale_frame understand them @@ -294,9 +322,11 @@ def _reformat( # Shortcut if sws_scale_frame would be a no-op is_noop: cython.bint = sws_is_noop(new_frame.ptr, frame.ptr) != 0 if is_noop: - # Restore source frame colorspace/range to avoid side effects + # Restore source frame metadata to avoid side effects frame.ptr.colorspace = frame_src_colorspace frame.ptr.color_range = frame_src_color_range + frame.ptr.color_trc = frame_src_color_trc + frame.ptr.color_primaries = frame_src_color_primaries return frame if self.ptr == cython.NULL: @@ -311,9 +341,18 @@ def _reformat( with cython.nogil: ret = sws_scale_frame(self.ptr, new_frame.ptr, frame.ptr) - # Restore source frame colorspace/range to avoid side effects + # Restore source frame metadata to avoid side effects frame.ptr.colorspace = frame_src_colorspace frame.ptr.color_range = frame_src_color_range + frame.ptr.color_trc = frame_src_color_trc + frame.ptr.color_primaries = frame_src_color_primaries + + # Preserve the source's transfer/primaries on the output when no explicit + # conversion was requested (the scale ran with neutralized tags). + if not convert_trc: + new_frame.ptr.color_trc = frame_src_color_trc + if not convert_primaries: + new_frame.ptr.color_primaries = frame_src_color_primaries err_check(ret) diff --git a/tests/test_colorspace.py b/tests/test_colorspace.py index 6afd42cf0..1dec93e20 100644 --- a/tests/test_colorspace.py +++ b/tests/test_colorspace.py @@ -121,3 +121,50 @@ def test_reformat_dst_colorspace_metadata( frame = av.VideoFrame(width=64, height=64, format="yuv420p") rgb = frame.reformat(format="rgb24", dst_colorspace=colorspace) assert rgb.colorspace == expected + + +# RESERVED0 (0) and RESERVED (3) primaries/transfer values, plus a couple of +# transfer functions swscale can't handle (LOG / LOG_SQRT). Real VP9 and NVDEC +# streams routinely tag frames with these. sws_scale_frame (used since 17.0.0) +# validates these fields and rejects them with EOPNOTSUPP, which regressed a +# plain reformat/to_ndarray to "rgb24" (#2208). The pre-17.0 sws_scale ignored +# them, and a transfer/primaries conversion should stay opt-in. +@pytest.mark.parametrize( + ("color_primaries", "color_trc"), + [ + (3, 3), # RESERVED / RESERVED + (0, 0), # RESERVED0 / RESERVED0 + (3, 2), # reserved primaries only + (2, 3), # reserved transfer only + (2, 9), # AVCOL_TRC_LOG (unsupported by swscale) + (2, 10), # AVCOL_TRC_LOG_SQRT (unsupported by swscale) + ], +) +def test_reformat_unsupported_color_metadata( + color_primaries: int, color_trc: int +) -> None: + frame = av.VideoFrame(width=64, height=64, format="yuv420p") + frame.colorspace = Colorspace.ITU709 + frame.color_primaries = color_primaries + frame.color_trc = color_trc + + # Neither of these should raise OSError(EOPNOTSUPP). + rgb = frame.reformat(format="rgb24") + assert rgb.format.name == "rgb24" + array = frame.to_ndarray(format="rgb24") + assert array.shape == (64, 64, 3) + + # The reformat must not mutate the source frame's metadata. + assert frame.color_primaries == color_primaries + assert frame.color_trc == color_trc + + # The BT.709 matrix is still applied even though the transfer/primaries are + # unsupported: a neutral gray must stay gray. + gray = av.VideoFrame(width=64, height=64, format="yuv420p") + gray.colorspace = Colorspace.ITU709 + gray.color_primaries = color_primaries + gray.color_trc = color_trc + for plane, value in zip(gray.planes, (128, 128, 128)): + plane.update(bytes([value]) * plane.buffer_size) + out = gray.to_ndarray(format="rgb24") + assert out.min() == out.max() == out[0, 0, 0] From 4d00cc890827d7e189fec7973a4efc1f2ab72270 Mon Sep 17 00:00:00 2001 From: WyattBlue Date: Wed, 1 Jul 2026 16:47:31 -0400 Subject: [PATCH 2/2] Accept Codec or codec name as BitStreamFilterContext in_stream in_stream now takes a Stream, a Codec, a codec-name str, or None. A Stream still copies the full input codec parameters, while a Codec or name only pins par_in->codec_id/codec_type, which is enough for codec-specific filters to initialize (filters that also need extradata still require a Stream). Also corrects the AVBSFContext.par_in/par_out declarations to non-const to match FFmpeg's header. --- CHANGELOG.rst | 1 + av/bitstream.py | 21 +++++++++++++++++---- av/bitstream.pyi | 3 ++- include/avcodec.pxd | 4 ++-- tests/test_bitstream.py | 16 ++++++++++++++++ 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3152c1f56..19ac4ed87 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -43,6 +43,7 @@ Features: - Add ``at`` parameter to ``Graph.push`` and ``Graph.vpush`` to push a frame to a single buffer source by index, for multi-input filters like ``overlay`` by :gh-user:`WyattBlue`. - ``find_best_pix_fmt_of_list`` now returns the loss as a ``PixFmtLoss`` ``enum.IntFlag`` instead of a plain ``int`` by :gh-user:`WyattBlue` (:issue:`2300`). - Add ``Colorspace.BT2020`` by :gh-user:`mark-oshea`. +- ``BitStreamFilterContext`` now accepts a ``Codec`` or a codec-name ``str`` as ``in_stream`` to pin the input codec without a full ``Stream`` by :gh-user:`WyattBlue`. Fixes: diff --git a/av/bitstream.py b/av/bitstream.py index db808ab35..822e710a5 100644 --- a/av/bitstream.py +++ b/av/bitstream.py @@ -1,5 +1,6 @@ import cython import cython.cimports.libav as lib +from cython.cimports.av.codec.codec import Codec from cython.cimports.av.error import err_check from cython.cimports.av.packet import Packet from cython.cimports.av.stream import Stream @@ -14,14 +15,18 @@ class BitStreamFilterContext: Wraps :ffmpeg:`AVBSFContext` - :param Stream in_stream: A stream that defines the input codec for the bitfilter. + :param in_stream: Defines the input codec for the bitfilter. A :class:`.Stream` + copies the full input codec parameters, while a :class:`.Codec` or a codec-name + ``str`` only pins the input codec, which is all a codec-specific filter (such as + ``h264_mp4toannexb``) needs to initialize. + :type in_stream: :class:`.Stream`, :class:`.Codec`, str, or None :param Stream out_stream: A stream whose codec is overwritten using the output parameters from the bitfilter. """ def __cinit__( self, filter_description, - in_stream: Stream | None = None, + in_stream: Stream | Codec | str | None = None, out_stream: Stream | None = None, ): res: cython.int @@ -31,12 +36,20 @@ def __cinit__( res = lib.av_bsf_list_parse_str(filter_str, cython.address(self.ptr)) err_check(res) - if in_stream is not None: + if isinstance(in_stream, Stream): with cython.nogil: res = lib.avcodec_parameters_copy( - self.ptr.par_in, in_stream.ptr.codecpar + self.ptr.par_in, cython.cast(Stream, in_stream).ptr.codecpar ) err_check(res) + elif in_stream is not None: + # A Codec or codec name only pins the input codec, which is enough for + # codec-specific filters (e.g. h264_mp4toannexb) to initialize. + codec: Codec = ( + in_stream if isinstance(in_stream, Codec) else Codec(in_stream) + ) + self.ptr.par_in.codec_id = codec.ptr.id + self.ptr.par_in.codec_type = codec.ptr.type with cython.nogil: res = lib.av_bsf_init(self.ptr) diff --git a/av/bitstream.pyi b/av/bitstream.pyi index 477c65f2d..34adc16bf 100644 --- a/av/bitstream.pyi +++ b/av/bitstream.pyi @@ -1,3 +1,4 @@ +from .codec import Codec from .packet import Packet from .stream import Stream @@ -5,7 +6,7 @@ class BitStreamFilterContext: def __init__( self, filter_description: str | bytes, - in_stream: Stream | None = None, + in_stream: Stream | Codec | str | None = None, out_stream: Stream | None = None, ): ... def filter(self, packet: Packet | None) -> list[Packet]: ... diff --git a/include/avcodec.pxd b/include/avcodec.pxd index c94deaaa9..02e9ac9ca 100644 --- a/include/avcodec.pxd +++ b/include/avcodec.pxd @@ -502,8 +502,8 @@ cdef extern from "libavcodec/bsf.h" nogil: cdef struct AVBSFContext: const AVBitStreamFilter *filter - const AVCodecParameters *par_in - const AVCodecParameters *par_out + AVCodecParameters *par_in + AVCodecParameters *par_out cdef int av_bsf_list_parse_str(const char *str, AVBSFContext **bsf) cdef int av_bsf_init(AVBSFContext *ctx) diff --git a/tests/test_bitstream.py b/tests/test_bitstream.py index f94ed73af..820704e25 100644 --- a/tests/test_bitstream.py +++ b/tests/test_bitstream.py @@ -72,6 +72,22 @@ def test_filter_h264_mp4toannexb() -> None: assert is_annexb(p) +def test_filter_in_stream_codec_and_name() -> None: + # A Codec or codec-name str can stand in for a Stream to pin the input codec, + # which is all a codec-specific filter needs to initialize. + # (Filters that also need the stream's extradata, such as h264_mp4toannexb, + # still require a full Stream to convert correctly.) + for in_stream in ("h264", av.Codec("h264", "r")): + ctx = BitStreamFilterContext("h264_mp4toannexb", in_stream) + assert isinstance(ctx, BitStreamFilterContext) + + +def test_filter_in_stream_wrong_codec() -> None: + # h264_mp4toannexb only supports h264, so a mismatched codec is rejected. + with pytest.raises(av.ArgumentError): + BitStreamFilterContext("h264_mp4toannexb", "hevc") + + def test_filter_output_parameters() -> None: with av.open(fate_suite("h264/interlaced_crop.mp4"), "r") as container: stream = container.streams.video[0]