[GPU] Add functionality of debug config ForceImpltype (#11884)

+ Force one layer using id + Add do, reduce to the primitive list Signed-off-by: Min, Byungil <byungil.min@intel.com>
2022-06-17 15:03:34 +09:00 · 2022-06-17 15:03:34 +09:00 · d3ca2f8cf1
commit d3ca2f8cf1
parent 49c1a25e0f
1 changed files with 36 additions and 6 deletions
--- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
+++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
@ -1373,11 +1373,41 @@ impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node)
 #ifdef GPU_DEBUG_CONFIG
    GPU_DEBUG_GET_INSTANCE(debug_config);
    GPU_DEBUG_IF(!debug_config->forced_impl_type.empty()) {
+        // Forcing impl type of one primitive
+        std::string forced_impl_type = debug_config->forced_impl_type;
        if (node.is_type<fully_connected>()) {
-            if (debug_config->forced_impl_type == "fc:ocl")
+            if (forced_impl_type == "fc:ocl")
                return impl_types::ocl;
-            else if (debug_config->forced_impl_type == "fc:onednn")
+            else if (forced_impl_type == "fc:onednn")
                return impl_types::onednn;
+        } else if (node.is_type<detection_output>()) {
+            if (forced_impl_type == "do:cpu")
+                return impl_types::cpu;
+            else if (forced_impl_type == "do:ocl")
+                return impl_types::ocl;
+        } else if (node.is_type<reduce>()) {
+            if (forced_impl_type == "reduce:ocl")
+                return impl_types::ocl;
+            else if (forced_impl_type == "reduce:onednn")
+                return impl_types::onednn;
+        }
+
+        // Forcing one layer
+        size_t found_type = forced_impl_type.rfind(":");
+        if (found_type != std::string::npos) {
+            impl_types preferred_type = impl_types::any;
+            auto impl_type = forced_impl_type.substr(found_type + 1);
+            if (impl_type == "ocl")
+                preferred_type = impl_types::ocl;
+            else if (impl_type == "onednn")
+                preferred_type = impl_types::onednn;
+            else if (impl_type == "cpu")
+                preferred_type = impl_types::cpu;
+
+            if (node.id() == forced_impl_type.substr(0, found_type)) {
+                std::cout << "  >>> " << forced_impl_type.substr(0, found_type) << " : " << forced_impl_type.substr(found_type + 1) << std::endl;
+                return preferred_type;
+            }
        }
    }
 #endif
@ -1387,6 +1417,10 @@ impl_types layout_optimizer::get_forced_impl_type_by_config(program_node& node)

 impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format preferred_format) {
    impl_types preferred_impl = impl_types::any;
+    auto forced_impl = get_forced_impl_type_by_config(node);
+    if (forced_impl != impl_types::any)
+        return forced_impl;
+
    if (!_forcing_map.empty() && _forcing_map.count(node.id()) != 0) {
        preferred_impl = _forcing_map.at(node.id()).second;
    } else if (node.is_type<detection_output>()) {
@ -1563,10 +1597,6 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format
        }
    // TODO: uncomment this code when onednn gemm implementations will have real perf improvements vs cldnn
    } else if (node.is_type<fully_connected>()/* || node.is_type<gemm>()*/) {
-        auto forced_impl = get_forced_impl_type_by_config(node);
-        if (forced_impl != impl_types::any)
-            return forced_impl;
-
        if (!_optimization_attributes.use_onednn_impls)
            return impl_types::ocl;