diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_detection_output_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_detection_output_ref.cl
index 58dfdda19e7..bcf2e71c421 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_detection_output_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_detection_output_ref.cl
@@ -286,8 +286,13 @@ KERNEL(eddo_ref_stage_1)
  __global ScoreClassIndex* score_class_index_map,
  __global uint* detection_count) {
     size_t total_detections_num = 0;
+
     // FIXME: figure out how to parallelize this!!!
+#ifdef CLASS_AGNOSTIC_BOX_REGRESSION
+    for (int class_idx = 1; class_idx < NUM_CLASSES; ++class_idx) {
+#else
     for (int class_idx = 0; class_idx < NUM_CLASSES; ++class_idx) {
+#endif
         FUNC_CALL(nms_cf)
         (&refined_scores[ROI_COUNT * class_idx],
          &refined_boxes[ROI_COUNT * 4 * class_idx],
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_roi_feature_extractor_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_roi_feature_extractor_ref.cl
index d2353319a45..a1b6ebdf54d 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_roi_feature_extractor_ref.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/experimental_detectron_roi_feature_extractor_ref.cl
@@ -5,6 +5,12 @@
 #include "include/batch_headers/common.cl"
 #include "include/batch_headers/data_types.cl"
 
+inline int FUNC(get_pyramid_level_index)(uint level, uint c, uint y, uint x) {
+    uint idx = 0;
+    LEVELS_IDX_CALC_FUNCS;
+    return idx;
+}
+
 inline int FUNC(get_pyramid_level_for_roi)(const __global INPUT0_TYPE* current_roi) {
     const INPUT0_TYPE canonical_scale = 224.0;
     const int canonical_level = 2;
@@ -63,8 +69,6 @@ KERNEL(experimental_detectron_roi_feature_extractor_ref)(const __global INPUT0_T
     const uint level_offset = LEVEL_SIZES[3 * level + 2];
 
     INPUT0_TYPE output_val = 0.0;
-    const __global INPUT1_TYPE* data = current_level_ptr + level_offset + c * level_h * level_w;
-
     INPUT0_TYPE current_bin_start_h = roi_start_h + y * bin_height;
     INPUT0_TYPE current_bin_start_w = roi_start_w + x * bin_width;
     for (int iy = 0; iy < roi_bin_grid_h; iy++) {
@@ -114,10 +118,10 @@ KERNEL(experimental_detectron_roi_feature_extractor_ref)(const __global INPUT0_T
             INPUT0_TYPE w3 = ly * hx;
             INPUT0_TYPE w4 = ly * lx;
 
-            output_val += w1 * data[y_low * level_w + x_low] +
-                          w2 * data[y_low * level_w + x_high] +
-                          w3 * data[y_high * level_w + x_low] +
-                          w4 * data[y_high * level_w + x_high];
+            output_val += w1 * current_level_ptr[FUNC_CALL(get_pyramid_level_index)(level, c, y_low, x_low)] +
+                          w2 * current_level_ptr[FUNC_CALL(get_pyramid_level_index)(level, c, y_low, x_high)] +
+                          w3 * current_level_ptr[FUNC_CALL(get_pyramid_level_index)(level, c, y_high, x_low)] +
+                          w4 * current_level_ptr[FUNC_CALL(get_pyramid_level_index)(level, c, y_high, x_high)];
         }
     }
     output_val /= TO_INPUT0_TYPE(roi_bin_grid_h * roi_bin_grid_w);
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/ed_rfe/roi_feature_extractor_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/ed_rfe/roi_feature_extractor_kernel_ref.cpp
index f6b695a0b7f..afaf44e64df 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/ed_rfe/roi_feature_extractor_kernel_ref.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/ed_rfe/roi_feature_extractor_kernel_ref.cpp
@@ -65,6 +65,16 @@ namespace {
         result += "}";
         return result;
     }
+
+    std::string GetIndexCalculationFuncs(size_t levels_num) {
+        std::string result = "if (level == 0) { idx = INPUT1_GET_INDEX(0, c, y, x); }";
+        std::string idx = "";
+        for (size_t i = 1; i < levels_num; i++) {
+            idx = std::to_string(i + 1);
+            result += " else if (level == " + std::to_string(i) + ") { idx = INPUT" + idx + "_GET_INDEX(0, c, y, x); }";
+        }
+        return result;
+    }
 }  // namespace
 
 JitConstants ExperimentalDetectronROIFeatureExtractorRef::GetJitConstants(const experimental_detectron_roi_feature_extractor_params& params) const {
@@ -79,7 +89,8 @@ JitConstants ExperimentalDetectronROIFeatureExtractorRef::GetJitConstants(const
                       MakeJitConstant("INPUT_LEVEL_PARAMS", GetInputLevelParams(levels_num)),
                       MakeJitConstant("LEVEL_PTRS", GetDefinedLevelPtrs(levels_num)),
                       MakeJitConstant("SPATIAL_SCALES", GetDefinedSpatialScales(params.pyramid_scales, levels_num)),
-                      MakeJitConstant("LEVEL_SIZES", GetDefinedLevelSizes(levels_num))});
+                      MakeJitConstant("LEVEL_SIZES", GetDefinedLevelSizes(levels_num)),
+                      MakeJitConstant("LEVELS_IDX_CALC_FUNCS", GetIndexCalculationFuncs(levels_num))});
 
     return jit;
 }
diff --git a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp
index 9a7b027c619..3881b773c02 100644
--- a/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp
+++ b/src/plugins/intel_gpu/tests/test_cases/experimental_detectron_roi_feature_extractor_gpu_test.cpp
@@ -122,11 +122,18 @@ TEST(experimental_detectron_roi_feature_extractor_gpu_fp32, two_levels) {
     const std::vector<int64_t> pyramid_scales = {4, 224};
     const int sampling_ratio = 2;
     const bool aligned = false;
+    auto level_1_layout = layout{data_types::f32, format::bfyx, {1, 2, 3, 2}};
+    auto level_2_layout = layout{data_types::f32, format::bfyx, {1, 2, 3, 2}};
     auto roi_input = engine.allocate_memory({data_types::f32, format::bfyx, tensor(batch(rois_num), feature(rois_feature_dim))});
-    auto level_1 = engine.allocate_memory({data_types::f32, format::bfyx, {1, 2, 3, 2}});
-    auto level_2 = engine.allocate_memory({data_types::f32, format::bfyx, {1, 2, 3, 2}});
+    auto level_1 = engine.allocate_memory(level_1_layout);
+    auto level_2 = engine.allocate_memory(level_2_layout);
     auto second_output = engine.allocate_memory({ data_types::f32, format::bfyx, tensor(batch(rois_num), feature(rois_feature_dim))});
 
+    auto level_1_padded_layout = level_1_layout;
+    level_1_padded_layout.data_padding = padding({0, 0, 1, 1}, {0, 0, 1, 1});
+    auto level_2_padded_layout = level_2_layout;
+    level_2_padded_layout.data_padding = padding({0, 1, 1, 1}, {0, 1, 1, 1});
+
     std::vector<float> rois {0.0f, 56.0f, 112.0f, 168.0f, 4.0f, 5.0f, 6.0f, 7.0f};
     set_values(roi_input, rois);
     set_values(level_1, {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f});
@@ -135,6 +142,8 @@ TEST(experimental_detectron_roi_feature_extractor_gpu_fp32, two_levels) {
     const std::string input_rois_id = "InputRois";
     const std::string input_level_1_id = "InputLevel1";
     const std::string input_level_2_id = "InputLevel2";
+    const std::string input_level_1_pad_id = "InputLevel1_padding";
+    const std::string input_level_2_pad_id = "InputLevel2_padding";
     const std::string second_output_w_id = "second_output_w";
     const std::string second_output_r_id = "second_output_r";
     const std::string feature_extractor_id = "experimental_detectron_roi_feature_extractor";
@@ -143,9 +152,14 @@ TEST(experimental_detectron_roi_feature_extractor_gpu_fp32, two_levels) {
     topology.add(input_layout(input_rois_id, roi_input->get_layout()));
     topology.add(input_layout(input_level_1_id, level_1->get_layout()));
     topology.add(input_layout(input_level_2_id, level_2->get_layout()));
+    topology.add(reorder(input_level_1_pad_id, input_info(input_level_1_id), level_1_padded_layout));
+    topology.add(reorder(input_level_2_pad_id, input_info(input_level_2_id), level_2_padded_layout));
     topology.add(mutable_data(second_output_w_id, second_output));
     topology.add(experimental_detectron_roi_feature_extractor(feature_extractor_id,
-                                                              { input_info(input_rois_id), input_info(input_level_1_id), input_info(input_level_2_id), input_info(second_output_w_id) },
+                                                              { input_info(input_rois_id),
+                                                                input_info(input_level_1_pad_id),
+                                                                input_info(input_level_2_pad_id),
+                                                                input_info(second_output_w_id) },
                                                               output_dim,
                                                               pyramid_scales,
                                                               sampling_ratio,