diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gen9_common_conv_fwd_data_f32.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gen9_common_conv_fwd_data_f32.cl
index 671cd55ef56..35110232fc1 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gen9_common_conv_fwd_data_f32.cl
+++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/gen9_common_conv_fwd_data_f32.cl
@@ -476,10 +476,16 @@ const float sum_scale = 1;
 #if OW % OW_BLOCK != 0
     if (ow + OW_BLOCK > OW) {
         for (int i = 0; i < OW - OW_LAST; i++) {
+#if HAS_FUSED_OPS
+            { FUSED_OPS_SCALAR0; blockC00[i] = FUSED_OPS_RESULT_SCALAR0; }
+#endif
             _sub_group_block_write((__global unsigned int *)(&dst_write0[i
                                                 * OC_BLOCK * MB_BLOCK]),
                     as_uint(blockC00[i]));
 #if OCB == 32
+#if HAS_FUSED_OPS
+            { FUSED_OPS_SCALAR1; blockC01[i] = FUSED_OPS_RESULT_SCALAR1; }
+#endif
             _sub_group_block_write(
                     (__global unsigned int
                                     *)(&dst_write0[i * OC_BLOCK * MB_BLOCK
@@ -492,10 +498,16 @@ const float sum_scale = 1;
 #if OW_BLOCK != 8 || MB_BLOCK != 1
         __attribute__((opencl_unroll_hint(OW_BLOCK))) // attr:no-format
         for (int i = 0; i < OW_BLOCK; i++) {
+#if HAS_FUSED_OPS
+            { FUSED_OPS_SCALAR0; blockC00[i] = FUSED_OPS_RESULT_SCALAR0; }
+#endif
             _sub_group_block_write((__global unsigned int *)(&dst_write0[i
                                                 * OC_BLOCK * MB_BLOCK]),
                     as_uint(blockC00[i]));
 #if OCB == 32
+#if HAS_FUSED_OPS
+            { FUSED_OPS_SCALAR1; blockC01[i] = FUSED_OPS_RESULT_SCALAR1; }
+#endif
             _sub_group_block_write(
                     (__global unsigned int
                                     *)(&dst_write0[i * OC_BLOCK * MB_BLOCK
@@ -504,9 +516,15 @@ const float sum_scale = 1;
 #endif
         }
 #else
+#if HAS_FUSED_OPS
+    { FUSED_OPS_VEC0; blockC00 = FUSED_OPS_RESULT_VEC0; }
+#endif
     _sub_group_block_write8(
             (__global unsigned int *)(&dst_write0[0]), as_uint8(blockC00));
 #if OCB == 32
+#if HAS_FUSED_OPS
+    { FUSED_OPS_VEC1; blockC01 = FUSED_OPS_RESULT_VEC1; }
+#endif
     _sub_group_block_write8((__global unsigned int *)(&dst_write0[OC_BLOCK
                                          * MB_BLOCK * ODHW_SIZE]),
             as_uint8(blockC01));
diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp
index c799fc6d37d..ff2154ad78a 100644
--- a/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp
+++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/convolution/convolution_kernel_b_fs_zyx_fsv16.cpp
@@ -223,6 +223,17 @@ bool ConvolutionKernel_b_fs_zyx_fsv16::Validate(const Params& p, const optional_
         return false;
     }
 
+    // Check if operation fusion is supported
+    if (!params.fused_ops.empty()) {
+        const bool is_1stconv = input.Feature().v == 3 && input.GetLayout() == DataLayout::bfzyx;
+        const bool ver_16mb16c = !is_1stconv && ((output.GetDType() == Datatype::F16 && output.Batch().v % 32 == 0) ||
+                                                (output.GetDType() == Datatype::F32 && output.Batch().v % 16 == 0));
+
+        if (!ver_16mb16c && is_1stconv && output.GetDType() == Datatype::F16) {
+            return false;
+        }
+    }
+
     return true;
 }
 
@@ -310,7 +321,7 @@ JitConstants ConvolutionKernel_b_fs_zyx_fsv16::GetJitConstants(const convolution
             jit.Merge(MakeFusedOpsJitConstants(params, {conf_vec0, conf_vec1, conf_vec2, conf_vec3,
                                                         conf_scalar0, conf_scalar1, conf_scalar2, conf_scalar3}));
         }
-    } else if (!is_1stconv && !params.fused_ops.empty()) {
+    } else if ((!is_1stconv || output.GetDType() == Datatype::F32) && !params.fused_ops.empty()) {
         FusedOpsConfiguration conf_vec0 = GenerateFusedOpsConfiguration_f16(0, "blockC0", input_dt, true);
         FusedOpsConfiguration conf_vec1 = GenerateFusedOpsConfiguration_f16(1, "blockC0", input_dt, true);
         FusedOpsConfiguration conf_scalar0 = GenerateFusedOpsConfiguration_f16(0, "blockC0", input_dt, false);
diff --git a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp
index f45cda3e6de..335c7459698 100644
--- a/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/fusions/convolution_fusion_test.cpp
@@ -361,6 +361,7 @@ public:
 #define CASE_CONV_FP32_13 { 1, 16, 18, 5, 4 }, { 1, 16, 16, 3, 2 }, { 1, 1, 3, 3, 3 }, { 1, 1, 1 }, { 0, 0, 0 }, { 1, 1, 1 }, 2, data_types::f32, format::b_fs_zyx_fsv16, data_types::f32, format::g_os_is_zyx_isv16_osv16, data_types::f32, format::bfzyx
 #define CASE_CONV_FP32_14 { 1, 3, 4, 5 }, { 1, 30, 2, 3 }, { 1, 1, 3, 3 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::bfyx, data_types::f32, format::bfyx
 #define CASE_CONV_FP32_15 { 1, 6, 4, 4 }, { 1, 16, 4, 4 }, { 1, 1, 3, 3 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, 1, data_types::f32, format::bfyx, data_types::f32, format::bfyx, data_types::f32, format::bfyx
+#define CASE_CONV_FP32_16 { 1, 3, 112, 112, 8 }, { 1, 16, 56, 56, 8 }, { 1, 1, 3, 3, 1 }, { 2, 2, 1 }, { 1, 1, 0 }, { 1, 1, 1 }, 1, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx, data_types::f32, format::bfzyx
 
 
 #define CASE_CONV_FP16_1 { 1, 15, 4, 5 }, { 1, 30, 2, 3 }, { 1, 1, 3, 3 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, 1, data_types::f16, format::bfyx, data_types::f16, format::bfyx, data_types::f16, format::bfyx
@@ -378,6 +379,7 @@ public:
 #define CASE_CONV_FP16_13 { 16, 32, 4, 5 }, { 16, 64, 2, 3 }, { 1, 1, 3, 3 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, 1, data_types::f16, format::fs_b_yx_fsv32, data_types::f16, format::bfyx, data_types::f16, format::bfyx
 #define CASE_CONV_FP16_14 { 1, 32, 55, 1 }, { 1, 32, 55, 1 }, { 1, 1, 3, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, 32, data_types::f16, format::b_fs_yx_fsv16, data_types::f16,  format::gs_oiyx_gsv16, data_types::f16, format::bfyx
 #define CASE_CONV_FP16_15 { 1, 39, 55, 1 }, { 1, 39, 55, 1 }, { 1, 1, 3, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, 39, data_types::f16, format::b_fs_yx_fsv16, data_types::f16,  format::gs_oiyx_gsv16, data_types::f16, format::bfyx
+#define CASE_CONV_FP16_16 { 1, 3, 112, 112, 8 }, { 1, 32, 56, 56, 8 }, { 1, 1, 3, 3, 1 }, { 2, 2, 1 }, { 1, 1, 0 }, { 1, 1, 1 }, 1, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx, data_types::f16, format::bfzyx
 
 #define CASE_CONV_U8S8_1 { 1, 15, 4, 5 }, { 1, 30, 2, 3 }, { 1, 1, 3, 3 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, 1, data_types::u8, format::bfyx, data_types::i8, format::bfyx, data_types::f32, format::bfyx
 #define CASE_CONV_U8S8_2 { 1, 15, 5, 5 }, { 1, 30, 3, 3 }, { 1, 1, 3, 3 }, { 1, 1 }, { 0, 0 }, { 1, 1 }, 1, data_types::u8, format::bfyx, data_types::i8, format::bfyx, data_types::f32, format::bfyx
@@ -3248,6 +3250,27 @@ INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_fp32_reorder_bfyx_to_fsv32_conv_data_
     convolution_test_params{ FSV32_CASE_CONV_FP32_1, 5, 5, 5 }
 }));
 
+class conv_gen9_common_conv_fwd_data_1stconv : public ConvFusingTest {};
+TEST_P(conv_gen9_common_conv_fwd_data_1stconv, basic) {
+    auto p = GetParam();
+    create_topologies(
+        input_layout("input", get_input_layout(p)),
+        data("weights", get_mem(get_weights_layout(p))),
+        data("bias", get_mem(get_bias_layout(p))),
+        convolution("conv_prim", input_info("input"), { "weights" }, { "bias" }, p.groups, p.stride, p.pad, p.dilation),
+        activation("activation", input_info("conv_prim"), activation_func::hswish),
+        reorder("reorder_bfyx", input_info("activation"), p.default_format, data_types::f32)
+    );
+
+    tolerance = default_tolerance(p.default_type);
+    execute(p);
+}
+
+INSTANTIATE_TEST_SUITE_P(fusings_gpu, conv_gen9_common_conv_fwd_data_1stconv, ::testing::ValuesIn(std::vector<convolution_test_params>{
+    convolution_test_params{ CASE_CONV_FP32_16, 2, 2, 3 },
+    convolution_test_params{ CASE_CONV_FP16_16, 2, 2, 3 },
+}));
+
 #ifdef ENABLE_ONEDNN_FOR_GPU
 class conv_fp16_prelu_onednn : public WeightsPrimitiveFusingTestOneDNN {};
 TEST_P(conv_fp16_prelu_onednn, basic_activation_eltwise) {