diff --git a/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc b/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc
index c8dd0e2a0..246269de1 100644
--- a/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc
+++ b/mediapipe/calculators/tensor/tensors_to_detections_calculator.cc
@@ -256,6 +256,7 @@ class TensorsToDetectionsCalculator : public Node {
 
   bool gpu_inited_ = false;
   bool gpu_input_ = false;
+  bool gpu_has_enough_work_groups_ = true;
   bool anchors_init_ = false;
 };
 MEDIAPIPE_REGISTER_NODE(TensorsToDetectionsCalculator);
@@ -291,7 +292,7 @@ absl::Status TensorsToDetectionsCalculator::Open(CalculatorContext* cc) {
 absl::Status TensorsToDetectionsCalculator::Process(CalculatorContext* cc) {
   auto output_detections = absl::make_unique<std::vector<Detection>>();
   bool gpu_processing = false;
-  if (CanUseGpu()) {
+  if (CanUseGpu() && gpu_has_enough_work_groups_) {
     // Use GPU processing only if at least one input tensor is already on GPU
     // (to avoid CPU->GPU overhead).
     for (const auto& tensor : *kInTensors(cc)) {
@@ -321,11 +322,20 @@ absl::Status TensorsToDetectionsCalculator::Process(CalculatorContext* cc) {
     RET_CHECK(!has_custom_box_indices_);
   }
 
-  if (gpu_processing) {
-    if (!gpu_inited_) {
-      MP_RETURN_IF_ERROR(GpuInit(cc));
+  if (gpu_processing && !gpu_inited_) {
+    auto status = GpuInit(cc);
+    if (status.ok()) {
       gpu_inited_ = true;
+    } else if (status.code() == absl::StatusCode::kFailedPrecondition) {
+      // For initialization error because of hardware limitation, fallback to
+      // CPU processing.
+      LOG(WARNING) << status.message();
+    } else {
+      // For other error, let the error propagates.
+      return status;
     }
+  }
+  if (gpu_processing && gpu_inited_) {
     MP_RETURN_IF_ERROR(ProcessGPU(cc, output_detections.get()));
   } else {
     MP_RETURN_IF_ERROR(ProcessCPU(cc, output_detections.get()));
@@ -346,17 +356,41 @@ absl::Status TensorsToDetectionsCalculator::ProcessCPU(
     // TODO: Add flexible input tensor size handling.
     auto raw_box_tensor =
         &input_tensors[tensor_mapping_.detections_tensor_index()];
-    RET_CHECK_EQ(raw_box_tensor->shape().dims.size(), 3);
-    RET_CHECK_EQ(raw_box_tensor->shape().dims[0], 1);
     RET_CHECK_GT(num_boxes_, 0) << "Please set num_boxes in calculator options";
-    RET_CHECK_EQ(raw_box_tensor->shape().dims[1], num_boxes_);
-    RET_CHECK_EQ(raw_box_tensor->shape().dims[2], num_coords_);
+    if (raw_box_tensor->shape().dims.size() == 3) {
+      // The tensors from CPU inference has dim 3.
+      RET_CHECK_EQ(raw_box_tensor->shape().dims[0], 1);
+      RET_CHECK_EQ(raw_box_tensor->shape().dims[1], num_boxes_);
+      RET_CHECK_EQ(raw_box_tensor->shape().dims[2], num_coords_);
+    } else if (raw_box_tensor->shape().dims.size() == 4) {
+      // The tensors from GPU inference has dim 4. For gpu-cpu fallback support,
+      // we allow tensors with 4 dims.
+      RET_CHECK_EQ(raw_box_tensor->shape().dims[0], 1);
+      RET_CHECK_EQ(raw_box_tensor->shape().dims[1], 1);
+      RET_CHECK_EQ(raw_box_tensor->shape().dims[2], num_boxes_);
+      RET_CHECK_EQ(raw_box_tensor->shape().dims[3], num_coords_);
+    } else {
+      return absl::InvalidArgumentError(
+          "The dimensions of box Tensor must be 3 or 4.");
+    }
     auto raw_score_tensor =
         &input_tensors[tensor_mapping_.scores_tensor_index()];
-    RET_CHECK_EQ(raw_score_tensor->shape().dims.size(), 3);
-    RET_CHECK_EQ(raw_score_tensor->shape().dims[0], 1);
-    RET_CHECK_EQ(raw_score_tensor->shape().dims[1], num_boxes_);
-    RET_CHECK_EQ(raw_score_tensor->shape().dims[2], num_classes_);
+    if (raw_score_tensor->shape().dims.size() == 3) {
+      // The tensors from CPU inference has dim 3.
+      RET_CHECK_EQ(raw_score_tensor->shape().dims[0], 1);
+      RET_CHECK_EQ(raw_score_tensor->shape().dims[1], num_boxes_);
+      RET_CHECK_EQ(raw_score_tensor->shape().dims[2], num_classes_);
+    } else if (raw_score_tensor->shape().dims.size() == 4) {
+      // The tensors from GPU inference has dim 4. For gpu-cpu fallback support,
+      // we allow tensors with 4 dims.
+      RET_CHECK_EQ(raw_score_tensor->shape().dims[0], 1);
+      RET_CHECK_EQ(raw_score_tensor->shape().dims[1], 1);
+      RET_CHECK_EQ(raw_score_tensor->shape().dims[2], num_boxes_);
+      RET_CHECK_EQ(raw_score_tensor->shape().dims[3], num_classes_);
+    } else {
+      return absl::InvalidArgumentError(
+          "The dimensions of score Tensor must be 3 or 4.");
+    }
     auto raw_box_view = raw_box_tensor->GetCpuReadView();
     auto raw_boxes = raw_box_view.buffer<float>();
     auto raw_scores_view = raw_score_tensor->GetCpuReadView();
@@ -1111,8 +1145,13 @@ void main() {
     int max_wg_size;  //  typically <= 1024
     glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1,
                     &max_wg_size);  // y-dim
-    CHECK_LT(num_classes_, max_wg_size)
-        << "# classes must be < " << max_wg_size;
+    gpu_has_enough_work_groups_ = num_classes_ < max_wg_size;
+    if (!gpu_has_enough_work_groups_) {
+      return absl::FailedPreconditionError(absl::StrFormat(
+          "Hardware limitation: Processing will be done on CPU, because "
+          "num_classes %d exceeds the max work_group size %d.",
+          num_classes_, max_wg_size));
+    }
     // TODO support better filtering.
     if (class_index_set_.is_allowlist) {
       CHECK_EQ(class_index_set_.values.size(),
@@ -1370,7 +1409,13 @@ kernel void scoreKernel(
         Tensor::ElementType::kFloat32, Tensor::Shape{1, num_boxes_ * 2});
     // # filter classes supported is hardware dependent.
     int max_wg_size = score_program_.maxTotalThreadsPerThreadgroup;
-    CHECK_LT(num_classes_, max_wg_size) << "# classes must be <" << max_wg_size;
+    gpu_has_enough_work_groups_ = num_classes_ < max_wg_size;
+    if (!gpu_has_enough_work_groups_) {
+      return absl::FailedPreconditionError(absl::StrFormat(
+          "Hardware limitation: Processing will be done on CPU, because "
+          "num_classes %d exceeds the max work_group size %d.",
+          num_classes_, max_wg_size));
+    }
   }
 
 #endif  // !defined(MEDIAPIPE_DISABLE_GL_COMPUTE)