[GPU] Fix custom layer (#20220)

2023-10-04 10:18:34 +04:00 · 2023-10-04 10:18:34 +04:00 · 0ee0b4d956
commit 0ee0b4d956
parent 749ed9dec7
5 changed files with 120 additions and 1 deletions
--- a/src/plugins/intel_gpu/src/plugin/ops/custom.cpp
+++ b/src/plugins/intel_gpu/src/plugin/ops/custom.cpp
@ -229,6 +229,7 @@ void CreateCustomOp(ProgramBuilder& p, const std::shared_ptr<ov::Node>& op, Cust
                                                  outputLayout,
                                                  gws,
                                                  lws);
+    p.add_primitive(*op, customPrim);

    auto prevLayerName = genericLayerName;
    if (outputLayout.format != cldnn::format::any) {
@ -240,7 +241,6 @@ void CreateCustomOp(ProgramBuilder& p, const std::shared_ptr<ov::Node>& op, Cust
                                            customPrim.output_layout.data_type));
        prevLayerName = reorderPrimName;
    }
-    p.add_primitive(*op, customPrim);
 }

 }  // namespace intel_gpu
--- a/src/plugins/intel_gpu/tests/functional/CMakeLists.txt
+++ b/src/plugins/intel_gpu/tests/functional/CMakeLists.txt
@ -12,6 +12,8 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
    ov_add_compiler_flags(/wd4305)
 endif()

+list(APPEND DEFINES TEST_CUSTOM_OP_CONFIG_PATH="${CMAKE_CURRENT_SOURCE_DIR}/custom_op/custom_op.xml")
+
 addIeTargetTest(
        NAME
            ${TARGET_NAME}
@ -23,6 +25,8 @@ addIeTargetTest(
            ${CMAKE_CURRENT_SOURCE_DIR}
            $<TARGET_PROPERTY:openvino_intel_gpu_plugin,SOURCE_DIR>/include/
            ${TEST_COMMON_INCLUDE_DIR}
+        DEFINES
+            ${DEFINES}
        DEPENDENCIES
            openvino_intel_gpu_plugin
        LINK_LIBRARIES
--- a/src/plugins/intel_gpu/tests/functional/custom_op/custom_op.cl
+++ b/src/plugins/intel_gpu/tests/functional/custom_op/custom_op.cl
@ -0,0 +1,5 @@
+__kernel void custom_kernel(__global const INPUT0_TYPE* input, __global OUTPUT0_TYPE* output) {
+    uint id = get_global_id(0);
+
+    output[id] = input[id] * alpha + beta;
+}
--- a/src/plugins/intel_gpu/tests/functional/custom_op/custom_op.cpp
+++ b/src/plugins/intel_gpu/tests/functional/custom_op/custom_op.cpp
@ -0,0 +1,97 @@
+// Copyright (C) 2022 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <utility>
+#include <vector>
+#include <memory>
+
+#include "openvino/core/any.hpp"
+#include "openvino/runtime/core.hpp"
+#include "openvino/runtime/exec_model_info.hpp"
+#include "openvino/runtime/properties.hpp"
+
+#include "base/ov_behavior_test_utils.hpp"
+
+using namespace ::testing;
+
+namespace ov {
+namespace test {
+namespace intel_gpu {
+
+class CustomOp : public ov::op::Op {
+private:
+    float m_alpha;
+    float m_beta;
+
+public:
+    OPENVINO_OP("CustomOp");
+
+    CustomOp() = default;
+
+    CustomOp(const ov::Output<ov::Node>& input, float alpha, float beta) : Op({input}), m_alpha(alpha), m_beta(beta) {
+        constructor_validate_and_infer_types();
+    }
+
+    void validate_and_infer_types() override {
+        set_output_size(1);
+        set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+    }
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override {
+        visitor.on_attribute("alpha", m_alpha);
+        visitor.on_attribute("beta", m_beta);
+        return true;
+    }
+
+    std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& inputs) const override {
+        return std::make_shared<CustomOp>(inputs[0], m_alpha, m_beta);
+    }
+
+    bool has_evaluate() const override {
+        return true;
+    }
+
+    bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override {
+        auto in = inputs[0];
+        auto out = outputs[0];
+        out.set_shape(in.get_shape());
+        for (size_t i = 0; i < out.get_size(); i++) {
+            out.data<float>()[i] = in.data<float>()[i] * m_alpha + m_beta;
+        }
+        return true;
+    }
+};
+
+static std::shared_ptr<ov::Model> get_simple_model_with_custom_op() {
+    auto param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::PartialShape{1, 2, 3, 4});
+    auto op = std::make_shared<CustomOp>(param, 1.0f, 2.0f);
+    auto result = std::make_shared<ov::op::v0::Result>(op);
+
+    return std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{param}, "model_with_custom_op");
+}
+
+TEST(CustomOp, CanReadValidCustomOpConfig) {
+    ov::Core core;
+    core.set_property(ov::test::utils::DEVICE_GPU, {{"CONFIG_FILE", TEST_CUSTOM_OP_CONFIG_PATH}});
+}
+
+TEST(CustomOp, NoRedundantReordersInserted) {
+    ov::Core core;
+    auto model = get_simple_model_with_custom_op();
+    ov::AnyMap config = { ov::hint::inference_precision(ov::element::f32), {"CONFIG_FILE", TEST_CUSTOM_OP_CONFIG_PATH}};
+    auto compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU, config);
+
+    auto runtime_graph = compiled_model.get_runtime_model();
+
+    auto ops = runtime_graph->get_ordered_ops();
+    ASSERT_EQ(ops.size(), 3);
+    ASSERT_STREQ(ops[0]->get_rt_info()[ov::exec_model_info::LAYER_TYPE].as<std::string>().c_str(), "Input");
+    ASSERT_STREQ(ops[1]->get_rt_info()[ov::exec_model_info::LAYER_TYPE].as<std::string>().c_str(), "CustomGPUPrimitive");
+    ASSERT_STREQ(ops[2]->get_rt_info()[ov::exec_model_info::LAYER_TYPE].as<std::string>().c_str(), "Result");
+}
+
+} // namespace intel_gpu
+} // namespace test
+} // namespace ov
--- a/src/plugins/intel_gpu/tests/functional/custom_op/custom_op.xml
+++ b/src/plugins/intel_gpu/tests/functional/custom_op/custom_op.xml
@ -0,0 +1,13 @@
+<CustomLayer name="CustomOp" type="SimpleGPU" version="1">
+    <Kernel entry="custom_kernel">
+        <Source filename="custom_op.cl"/>
+        <Define name="alpha" type="float" param="alpha" default="1.0"/>
+        <Define name="beta" type="float" param="beta" default="0.0"/>
+    </Kernel>
+    <Buffers><!-- use the same order as the kernel entry function, so in this case (input0,input1,output0,int,float,weights) -->
+        <Tensor arg-index="0" type="input" port-index="0"/>
+        <Tensor arg-index="1" type="output" port-index="0" format="BFYX"/>
+    </Buffers>
+    <CompilerOptions options="-cl-mad-enable"/>
+    <WorkSizes global="B*F*Y*X,1,1"/>
+</CustomLayer>