From b90c6a7dba2bf386024167e594487e3614a15bd5 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 27 May 2026 16:58:52 -0400 Subject: [PATCH 1/3] Call align_buffers() in from_ffi(), remove all from pyarrow FFI code. --- arrow-array/src/ffi.rs | 46 ++++++++++++++++++++++++++++++++++++++-- arrow-pyarrow/src/lib.rs | 7 +----- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index f50dd3420baa..043be1b70725 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -281,7 +281,13 @@ pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Resul data_type: dt, owner: &array, }; - tmp.consume() + let mut data = tmp.consume()?; + // arrow-rs has stricter alignment requirements than the C Data Interface spec; + // a no-op when buffers are already aligned. + // See https://github.com/apache/arrow/issues/43552 and + // https://github.com/apache/arrow-rs/issues/10028 for context. + data.align_buffers(); + Ok(data) } /// Import [ArrayData] from the C Data Interface @@ -299,7 +305,9 @@ pub unsafe fn from_ffi_and_data_type( data_type, owner: &array, }; - tmp.consume() + let mut data = tmp.consume()?; + data.align_buffers(); + Ok(data) } #[derive(Debug)] @@ -667,6 +675,40 @@ mod tests_to_then_from_ffi { } // case with nulls is tested in the docs, through the example on this module. + #[test] + #[cfg(not(feature = "force_validate"))] + fn test_decimal128_under_aligned_round_trip() -> Result<()> { + // Construct an 8-aligned-but-not-16-aligned i128 data buffer to model + // an FFI producer that only guarantees the C Data Interface's + // recommended 8-byte alignment (e.g. arrow-java). + let aligned = Buffer::from_vec(vec![0_i128, 1_i128, 2_i128]); + let under_aligned = aligned.slice(8); + assert_eq!(under_aligned.as_ptr().align_offset(8), 0); + assert_ne!(under_aligned.as_ptr().align_offset(16), 0); + + // SAFETY: buffer is large enough for 2 i128 elements; misaligned + // input is the condition under test. + let data = unsafe { + ArrayData::builder(DataType::Decimal128(10, 2)) + .len(2) + .add_buffer(under_aligned) + .build_unchecked() + }; + + let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap(); + let array = FFI_ArrowArray::new(&data); + + let imported = unsafe { from_ffi(array, &schema) }?; + let array = Decimal128Array::from(imported); + + // The little-endian byte layout of [0i128, 1, 2] sliced 8 bytes in + // yields elements `1 << 64` and `2 << 64`. + assert_eq!(array.len(), 2); + assert_eq!(array.value(0), 1_i128 << 64); + assert_eq!(array.value(1), 2_i128 << 64); + Ok(()) + } + #[test] fn test_null_count_handling() { let int32_data = ArrayData::builder(DataType::Int32) diff --git a/arrow-pyarrow/src/lib.rs b/arrow-pyarrow/src/lib.rs index d8f584e396d3..484324665cac 100644 --- a/arrow-pyarrow/src/lib.rs +++ b/arrow-pyarrow/src/lib.rs @@ -353,7 +353,7 @@ impl FromPyArrow for RecordBatch { .pointer_checked(Some(ARROW_ARRAY_CAPSULE_NAME))? .cast::(); let ffi_array = unsafe { FFI_ArrowArray::from_raw(array_ptr.as_ptr()) }; - let mut array_data = + let array_data = unsafe { ffi::from_ffi(ffi_array, schema_ptr.as_ref()) }.map_err(to_py_err)?; if !matches!(array_data.data_type(), DataType::Struct(_)) { return Err(PyTypeError::new_err( @@ -361,11 +361,6 @@ impl FromPyArrow for RecordBatch { )); } let options = RecordBatchOptions::default().with_row_count(Some(array_data.len())); - // Ensure data is aligned (by potentially copying the buffers). - // This is needed because some python code (for example the - // python flight client) produces unaligned buffers - // See https://github.com/apache/arrow/issues/43552 for details - array_data.align_buffers(); let array = StructArray::from(array_data); // StructArray does not embed metadata from schema. We need to override // the output schema with the schema from the capsule. From 3bb5550e435164b41ddca1c5d54ca3c5f3780bb2 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 28 May 2026 09:56:29 -0400 Subject: [PATCH 2/3] Address PR feedback. --- arrow-array/src/ffi.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 043be1b70725..4daaac17e891 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -306,6 +306,10 @@ pub unsafe fn from_ffi_and_data_type( owner: &array, }; let mut data = tmp.consume()?; + // arrow-rs has stricter alignment requirements than the C Data Interface spec; + // a no-op when buffers are already aligned. + // See https://github.com/apache/arrow/issues/43552 and + // https://github.com/apache/arrow-rs/issues/10028 for context. data.align_buffers(); Ok(data) } From 86b9584f92cbf7279dfca5435e0c910318fa016a Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 28 May 2026 13:23:23 -0400 Subject: [PATCH 3/3] Address PR feedback. --- arrow-array/src/ffi.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 4daaac17e891..59b9f6b3b7b5 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -283,7 +283,8 @@ pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Resul }; let mut data = tmp.consume()?; // arrow-rs has stricter alignment requirements than the C Data Interface spec; - // a no-op when buffers are already aligned. + // a no-op when buffers are already aligned. Unreachable under + // `cfg(feature = "force_validate")`; tracked in #10034. // See https://github.com/apache/arrow/issues/43552 and // https://github.com/apache/arrow-rs/issues/10028 for context. data.align_buffers(); @@ -307,7 +308,8 @@ pub unsafe fn from_ffi_and_data_type( }; let mut data = tmp.consume()?; // arrow-rs has stricter alignment requirements than the C Data Interface spec; - // a no-op when buffers are already aligned. + // a no-op when buffers are already aligned. Unreachable under + // `cfg(feature = "force_validate")`; tracked in #10034. // See https://github.com/apache/arrow/issues/43552 and // https://github.com/apache/arrow-rs/issues/10028 for context. data.align_buffers();