Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile.openvino
Original file line number Diff line number Diff line change
Expand Up @@ -296,4 +296,4 @@ ENV GLOG_logtostderr=1
ENV LD_LIBRARY_PATH=/usr/local/lib:/opt/intel/openvino/runtime/lib/intel64/:/opt/intel/openvino/runtime/3rdparty/tbb/lib/
WORKDIR /mediapipe

## End of demos image #########################################################
## End of demos image #########################################################
3 changes: 3 additions & 0 deletions build_desktop_examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ for app in ${apps}; do
target="${app}:extract_yt8m_features"
echo "Skipping target ${target}"
continue
elif [[ "${target_name}" == "bytetrack" ]]; then
echo "Skipping target ${target_name} ."
continue
else
target="${app}:${target_name}_cpu"
fi
Expand Down
26 changes: 26 additions & 0 deletions mediapipe/calculators/openvino/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,32 @@ cc_library(
alwayslink = 1,
)

cc_library(
name = "openvino_yolox_tensors_to_detections_calculator",
srcs = ["openvino_yolox_tensors_to_detections_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":openvino_yolox_tensors_to_detections_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"//third_party:openvino",
],
alwayslink = 1,
)

mediapipe_proto_library(
name = "openvino_yolox_tensors_to_detections_calculator_proto",
srcs = ["openvino_yolox_tensors_to_detections_calculator.proto"],
visibility = ["//visibility:public"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)

# To run this with native GPU on Linux, use:
# bazel test //mediapipe/calculators/tflite:tflite_inference_calculator_test --copt=-DTFLITE_GPU_EXTRA_GLES_DEPS --copt=-DMESA_EGL_NO_X11_HEADERS --copt=-DEGL_NO_X11 --config=grte_v5 --test_strategy=local
cc_test(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#include "mediapipe/calculators/openvino/openvino_yolox_tensors_to_detections_calculator.pb.h"

#include <algorithm>
#include <cmath>
#include <numeric>
#include <vector>

#include <openvino/openvino.hpp>

#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/formats/location_data.pb.h"
#include "mediapipe/framework/port/ret_check.h"
#include "mediapipe/framework/port/status.h"

namespace mediapipe {

// Converts YOLOX output OV tensors to MediaPipe Detections.
//
// YOLOX output tensor shape: [1, 3549, 85]
// Layout: [batch, num_boxes, num_attrs]
// decode_in_inference=True: sigmoid already applied, coords already decoded
// Attributes: [cx, cy, w, h, obj_score, class_0, ..., class_79]
// Coordinates are in PIXEL space (input image 416x416), NOT normalized
//
// Input:
// TENSORS: Vector of ov::Tensor
// Output:
// DETECTIONS: Vector of Detection protos

class OpenVINOYoloXTensorsToDetectionsCalculator : public CalculatorBase {
public:
static absl::Status GetContract(CalculatorContract* cc) {
RET_CHECK(!cc->Inputs().GetTags().empty());
RET_CHECK(!cc->Outputs().GetTags().empty());
if (cc->Inputs().HasTag("TENSORS"))
cc->Inputs().Tag("TENSORS").Set<std::vector<ov::Tensor>>();
if (cc->Outputs().HasTag("DETECTIONS"))
cc->Outputs().Tag("DETECTIONS").Set<std::vector<Detection>>();
return absl::OkStatus();
}

absl::Status Open(CalculatorContext* cc) override {
const auto& options =
cc->Options<mediapipe::OpenVINOYoloXTensorsToDetectionsCalculatorOptions>();
min_thresh_ = options.has_conf_thresh() ? options.conf_thresh() : 0.1f;
input_size_ = options.has_input_size() ? options.input_size() : 416.0f;
cc->SetOffset(TimestampDiff(0));
return absl::OkStatus();
}

absl::Status Process(CalculatorContext* cc) override {
if (cc->Inputs().Tag("TENSORS").IsEmpty())
return absl::OkStatus();

const auto& tensors =
cc->Inputs().Tag("TENSORS").Get<std::vector<ov::Tensor>>();
RET_CHECK(!tensors.empty());
const ov::Tensor& raw = tensors[0];
RET_CHECK(raw.get_element_type() == ov::element::f32);

const auto& shape = raw.get_shape();
RET_CHECK_EQ(shape.size(), 3u);
RET_CHECK_EQ(shape[0], 1u);
// Actual layout from TFLite: [1, 85, 3549] — attr-first
RET_CHECK_EQ(shape[1], static_cast<size_t>(num_attrs_)); // 85
RET_CHECK_EQ(shape[2], static_cast<size_t>(num_boxes_)); // 3549

const float* data = raw.data<float>();
RET_CHECK(data != nullptr);

// Accessor for [attr, box] layout
auto at = [&](int attr, int box) -> float {
return data[attr * num_boxes_ + box];
};

// Grid strides for 416x416:
// stride 8 → 52x52 = 2704 boxes
// stride 16 → 26x26 = 676 boxes
// stride 32 → 13x13 = 169 boxes
// total = 3549
struct GridInfo { int stride; int cols; int rows; };
const std::vector<GridInfo> grids = {
{8, 52, 52},
{16, 26, 26},
{32, 13, 13},
};

auto output_detections = absl::make_unique<std::vector<Detection>>();

int box_idx = 0;
for (const auto& g : grids) {
for (int gy = 0; gy < g.rows; ++gy) {
for (int gx = 0; gx < g.cols; ++gx, ++box_idx) {

// Sigmoid already baked in by TFLite Logistic ops
float obj = at(4, box_idx);

int best_cls = 0;
float best_cls_score = 0.0f;
for (int c = 0; c < num_classes_; ++c) {
float s = at(5 + c, box_idx);
if (s > best_cls_score) { best_cls_score = s; best_cls = c; }
}

float score = obj * best_cls_score;
if (score < min_thresh_) continue;
LOG(INFO)<<"CLASS: "<<best_cls<<", CLASS_SCORE: "<<best_cls_score<<", OBJECTNESS SCORE: "<<obj<< ", FINAL SCORE: "<<score;
// Coords are raw logits — grid decode needed
// cx, cy are offsets from grid cell origin
// w, h are log-scale relative to stride
float cx = (at(0, box_idx) + gx) * g.stride;
float cy = (at(1, box_idx) + gy) * g.stride;
float w = std::exp(at(2, box_idx)) * g.stride;
float h = std::exp(at(3, box_idx)) * g.stride;

// Normalize to [0, 1]
float x1 = std::max(0.0f, (cx - w * 0.5f) / input_size_);
float y1 = std::max(0.0f, (cy - h * 0.5f) / input_size_);
float x2 = std::min(1.0f, (cx + w * 0.5f) / input_size_);
float y2 = std::min(1.0f, (cy + h * 0.5f) / input_size_);

if (x2 <= x1 || y2 <= y1) continue;

Detection det;
auto* loc = det.mutable_location_data();
loc->set_format(LocationData::RELATIVE_BOUNDING_BOX);
auto* bbox = loc->mutable_relative_bounding_box();
bbox->set_xmin(x1);
bbox->set_ymin(y1);
bbox->set_width(x2 - x1);
bbox->set_height(y2 - y1);
det.add_score(score);
det.add_label_id(best_cls);
output_detections->emplace_back(det);
}
}
}

cc->Outputs().Tag("DETECTIONS")
.Add(output_detections.release(), cc->InputTimestamp());
return absl::OkStatus();
}
private:
const int num_boxes_ = 3549;
const int num_attrs_ = 85;
const int num_classes_ = 80;
float input_size_;
float min_thresh_;
};

REGISTER_CALCULATOR(OpenVINOYoloXTensorsToDetectionsCalculator);

} // namespace mediapipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
syntax = 'proto2';

package mediapipe;

import "mediapipe/framework/calculator.proto";

message OpenVINOYoloXTensorsToDetectionsCalculatorOptions {
extend .mediapipe.CalculatorOptions {
optional OpenVINOYoloXTensorsToDetectionsCalculatorOptions ext = 211376657;
}

optional float conf_thresh = 1 [default = 0.10];
optional float input_size = 3 [default = 416.0];

}
1 change: 1 addition & 0 deletions mediapipe/calculators/ovms/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ cc_library(
"//mediapipe/calculators/openvino:openvino_tensors_to_detections_calculator_cc_proto",
"//mediapipe/calculators/openvino:openvino_converter_calculator_cc_proto",
"//mediapipe/calculators/openvino:openvino_converter_calculator",
"//mediapipe/calculators/openvino:openvino_yolox_tensors_to_detections_calculator",
"//mediapipe/calculators/openvino:openvino_tensors_to_classification_calculator",
"//mediapipe/calculators/openvino:openvino_tensors_to_detections_calculator",
":modelapiovmsadapter",
Expand Down
6 changes: 6 additions & 0 deletions mediapipe/calculators/ovms/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
"name":"ssdlite_object_detection",
"base_path":"/mediapipe/mediapipe/models/ovms/ssdlite_object_detection"
}
},
{
"config":{
"name":"yoloxt_float32",
"base_path":"/mediapipe/mediapipe/models/ovms/yoloxt_float32"
}
}
]
}
25 changes: 25 additions & 0 deletions mediapipe/calculators/tflite/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,31 @@ mediapipe_proto_library(
],
)

cc_library(
name = "yolox_tensors_to_detections_calculator",
srcs = ["yolox_tensors_to_detections_calculator.cc"],
visibility = ["//visibility:public"],
deps = [
":yolox_tensors_to_detections_calculator_cc_proto",
"//mediapipe/framework:calculator_framework",
"//mediapipe/framework/formats:detection_cc_proto",
"//mediapipe/framework/formats:location_data_cc_proto",
"//mediapipe/framework/port:ret_check",
"//mediapipe/framework/port:status",
"@org_tensorflow//tensorflow/lite:framework",
],
alwayslink = 1,
)

mediapipe_proto_library(
name = "yolox_tensors_to_detections_calculator_proto",
srcs = ["yolox_tensors_to_detections_calculator.proto"],
deps = [
"//mediapipe/framework:calculator_options_proto",
"//mediapipe/framework:calculator_proto",
],
)

mediapipe_proto_library(
name = "tflite_tensors_to_landmarks_calculator_proto",
srcs = ["tflite_tensors_to_landmarks_calculator.proto"],
Expand Down
Loading