Add tensor size validation for MatMulBnb4 to prevent OOB read via K/N attribute mismatch

vraspar · Copilot · vraspar · commit 3a8d28f28a6e · 2026-04-06T17:23:07.000-07:00
Co-authored-by: Copilot &lt;223556219+Copilot@users.noreply.github.com&gt;
diff --git a/onnxruntime/contrib_ops/cpu/quantization/matmul_bnb4.cc b/onnxruntime/contrib_ops/cpu/quantization/matmul_bnb4.cc
@@ -19,6 +19,9 @@ class MatMulBnb4 final : public OpKernel {
     ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("N", &N_));
     ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("block_size", &block_size_));
     ORT_ENFORCE(Status::OK() == info.GetAttr<int64_t>("quant_type", &quant_type_));
+    ORT_ENFORCE(K_ > 0, "K must be positive, got ", K_);
+    ORT_ENFORCE(N_ > 0, "N must be positive, got ", N_);
+    ORT_ENFORCE(block_size_ > 0, "block_size must be positive, got ", block_size_);
     ORT_ENFORCE(
         quant_type_ == FP4 || quant_type_ == NF4,
         "Invalid quant_type, only 0 (FP4) and 1 (NF4) are supported.");
@@ -50,6 +53,24 @@ Status MatMulBnb4::Compute(OpKernelContext* ctx) const {
   const uint8_t* b_quant_data = b_quant->Data<uint8_t>();
   const float* absmax_data = absmax->Data<float>();
 
+  const int64_t numel = K_ * N_;
+  const int64_t expected_b_quant_size = (numel + 1) / 2;
+  const int64_t expected_absmax_size = (numel + block_size_ - 1) / block_size_;
+
+  if (b_quant->Shape().Size() < expected_b_quant_size) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "b_quant tensor size (", b_quant->Shape().Size(),
+                           ") is too small for K=", K_, " and N=", N_,
+                           ". Expected at least ", expected_b_quant_size, " elements.");
+  }
+  if (absmax->Shape().Size() < expected_absmax_size) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
+                           "absmax tensor size (", absmax->Shape().Size(),
+                           ") is too small for K=", K_, ", N=", N_,
+                           ", block_size=", block_size_,
+                           ". Expected at least ", expected_absmax_size, " elements.");
+  }
+
   AllocatorPtr allocator;
   auto status = ctx->GetTempSpaceAllocator(&allocator);
   ORT_RETURN_IF_ERROR(status);
diff --git a/onnxruntime/test/contrib_ops/matmul_bnb4_test.cc b/onnxruntime/test/contrib_ops/matmul_bnb4_test.cc
@@ -115,6 +115,44 @@ void RunTest(int64_t quant_type, int64_t M, int64_t N, int64_t K, int64_t block_
   }
 }
 
+TEST(MatMulBnb4, RejectsUndersizedBQuantTensor) {
+  // K=32, N=2 → numel=64, expected b_quant size = (64+1)/2 = 32
+  // Provide only 4 bytes (valid for K=4, N=2) but claim K=32, N=2
+  OpTester test("MatMulBnb4", 1, kMSDomain);
+  test.AddAttribute<int64_t>("K", 32LL);
+  test.AddAttribute<int64_t>("N", 2LL);
+  test.AddAttribute<int64_t>("block_size", 32LL);
+  test.AddAttribute<int64_t>("quant_type", 1LL);  // NF4
+
+  test.AddInput<float>("A", {1, 32}, std::vector<float>(32, 0.0f));
+  test.AddInput<uint8_t>("B", {4}, std::vector<uint8_t>(4, 0));  // too small
+  test.AddInput<float>("absmax", {2}, std::vector<float>(2, 1.0f));
+  test.AddOutput<float>("Y", {1, 2}, std::vector<float>(2, 0.0f));
+
+  test.Run(OpTester::ExpectResult::kExpectFailure, "b_quant tensor size");
+}
+
+TEST(MatMulBnb4, RejectsUndersizedAbsmaxTensor) {
+  // K=32, N=2, block_size=32 → numel=64, expected absmax size = (64+32-1)/32 = 2
+  // Provide only 1 absmax element
+  int64_t K = 32, N = 2, block_size = 32;
+  int64_t numel = K * N;
+  int64_t quantized_numel = (numel + 1) / 2;
+
+  OpTester test("MatMulBnb4", 1, kMSDomain);
+  test.AddAttribute<int64_t>("K", K);
+  test.AddAttribute<int64_t>("N", N);
+  test.AddAttribute<int64_t>("block_size", block_size);
+  test.AddAttribute<int64_t>("quant_type", 1LL);  // NF4
+
+  test.AddInput<float>("A", {1, K}, std::vector<float>(K, 0.0f));
+  test.AddInput<uint8_t>("B", {quantized_numel}, std::vector<uint8_t>(quantized_numel, 0));
+  test.AddInput<float>("absmax", {1}, std::vector<float>(1, 1.0f));  // too small
+  test.AddOutput<float>("Y", {1, N}, std::vector<float>(N, 0.0f));
+
+  test.Run(OpTester::ExpectResult::kExpectFailure, "absmax tensor size");
+}
+
 TEST(MatMulBnb4, DISABLED_Float32) {
   for (auto qt : {0, 1}) {
     for (auto M : {1, 2, 100}) {