[GPU] Change priority of CPU implementations (#17829)

This commit is contained in:
Sergey Shlyapnikov 2023-06-05 11:21:26 +04:00 committed by GitHub
parent a9ddc2b553
commit db8d23231a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 48 additions and 31 deletions

View File

@ -16,15 +16,15 @@
namespace cldnn {
template <typename T, typename U>
class singleton_map : public std::map<T, U> {
singleton_map() : std::map<T, U>() {}
singleton_map(singleton_map const&) = delete;
void operator=(singleton_map const&) = delete;
template <typename T>
class singleton_list : public std::vector<T> {
singleton_list() : std::vector<T>() {}
singleton_list(singleton_list const&) = delete;
void operator=(singleton_list const&) = delete;
public:
static singleton_map& instance() {
static singleton_map instance_;
static singleton_list& instance() {
static singleton_list instance_;
return instance_;
}
};
@ -47,20 +47,20 @@ public:
using key_builder = implementation_key;
using key_type = typename key_builder::type;
using factory_type = std::function<std::unique_ptr<primitive_impl>(const typed_program_node<primitive_kind>&, const kernel_impl_params&)>;
using map_type = singleton_map<std::pair<impl_types, shape_types>, std::pair<std::set<key_type>, factory_type>>;
using list_type = singleton_list<std::tuple<impl_types, shape_types, std::set<key_type>, factory_type>>;
static factory_type get(const kernel_impl_params& impl_params, impl_types preferred_impl_type, shape_types target_shape_type) {
auto input_layout = !impl_params.input_layouts.empty() ? impl_params.input_layouts[0] : layout{ov::PartialShape{}, data_types::f32, format::any};
auto key = key_builder()(input_layout);
for (auto& kv : map_type::instance()) {
impl_types impl_type = kv.first.first;
shape_types supported_shape_type = kv.first.second;
for (auto& kv : list_type::instance()) {
impl_types impl_type = std::get<0>(kv);
shape_types supported_shape_type = std::get<1>(kv);
if ((preferred_impl_type & impl_type) != impl_type)
continue;
if ((target_shape_type & supported_shape_type) != target_shape_type)
continue;
std::set<key_type>& keys_set = kv.second.first;
auto& factory = kv.second.second;
std::set<key_type>& keys_set = std::get<2>(kv);
auto& factory = std::get<3>(kv);
if (keys_set.empty() || keys_set.find(key) != keys_set.end()) {
return factory;
}
@ -85,14 +85,14 @@ public:
}
static bool check_key(impl_types target_impl_type, key_type key, shape_types target_shape_type) {
for (auto& kv : map_type::instance()) {
impl_types impl_type = kv.first.first;
shape_types supported_shape_type = kv.first.second;
for (auto& kv : list_type::instance()) {
impl_types impl_type = std::get<0>(kv);
shape_types supported_shape_type = std::get<1>(kv);
if ((target_impl_type & impl_type) != impl_type)
continue;
if ((target_shape_type & supported_shape_type) != target_shape_type)
continue;
std::set<key_type>& keys_set = kv.second.first;
std::set<key_type>& keys_set = std::get<2>(kv);
if (keys_set.empty())
return true;
return keys_set.find(key) != keys_set.end();
@ -117,7 +117,7 @@ public:
static void add(impl_types impl_type, shape_types shape_type, factory_type factory, std::set<key_type> keys) {
OPENVINO_ASSERT(impl_type != impl_types::any, "[GPU] Can't register impl with type any");
map_type::instance().insert({{impl_type, shape_type}, {keys, factory}});
list_type::instance().push_back({impl_type, shape_type, keys, factory});
}
static std::set<key_type> combine(const std::vector<data_types>& types, const std::vector<format::type>& formats) {
@ -133,22 +133,22 @@ public:
struct WeightsReordersFactory {
using factory_type = std::function<std::unique_ptr<primitive_impl>(const kernel_impl_params&)>;
using map_type = singleton_map<std::pair<impl_types, shape_types>, factory_type>;
using list_type = singleton_list<std::tuple<impl_types, shape_types, factory_type>>;
static void add(impl_types impl_type, shape_types shape_type, factory_type factory) {
OPENVINO_ASSERT(impl_type != impl_types::any, "[GPU] Can't register WeightsReordersFactory with type any");
map_type::instance().insert({{impl_type, shape_type}, factory});
list_type::instance().push_back({impl_type, shape_type, factory});
}
static factory_type get(impl_types preferred_impl_type, shape_types target_shape_type) {
for (auto& kv : map_type::instance()) {
impl_types impl_type = kv.first.first;
shape_types supported_shape_type = kv.first.second;
for (auto& kv : list_type::instance()) {
impl_types impl_type = std::get<0>(kv);
shape_types supported_shape_type = std::get<1>(kv);
if ((preferred_impl_type & impl_type) != impl_type)
continue;
if ((target_shape_type & supported_shape_type) != target_shape_type)
continue;
return kv.second;
return std::get<2>(kv);
}
OPENVINO_THROW("[GPU] WeightsReordersFactory doesn't have any implementation for "
" impl_type: ", preferred_impl_type, ", shape_type: ", target_shape_type);

View File

@ -177,14 +177,18 @@ void program::init_program() {
}
void program::init_primitives() {
// Register implementations in order of their selection priority: common, OCL, oneDNN, CPU
// We register OCL implementation before oneDNN, because oneDNN is not always preferable (in case of iGPU)
// This order will only apply to primitives with preferrable implementation type equal to impl_types::any
static bool is_initialized = false;
if (!is_initialized) {
common::register_implementations();
cpu::register_implementations();
ocl::register_implementations();
#ifdef ENABLE_ONEDNN_FOR_GPU
onednn::register_implementations();
#endif
cpu::register_implementations();
is_initialized = true;
}
}

View File

@ -5,6 +5,7 @@
#include "test_utils.h"
#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/reorder.hpp>
#include <intel_gpu/primitives/detection_output.hpp>
using namespace cldnn;
@ -429,8 +430,12 @@ public:
top_k, eta, code_type, variance_encoded_in_target, confidence_threshold, prior_info_size,
prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id
));
topology.add(reorder("output_reorder", input_info("detection_output"), format::bfyx, type_to_data_type<T>::value));
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"detection_output", {format::bfyx, "", impl_types::cpu}}}));
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -439,7 +444,7 @@ public:
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "detection_output");
ASSERT_EQ(outputs.begin()->first, "output_reorder");
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().batch(), 1);
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().feature(), 1);
@ -685,8 +690,12 @@ public:
topology.add(reorder("input_confidence_padded", input_info("input_confidence"), input_location->get_layout().with_padding(padding{ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } })));
topology.add(detection_output("detection_output", input_info("input_location_padded"), input_info("input_confidence_padded"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
topology.add(reorder("output_reorder", input_info("detection_output"), format::bfyx, type_to_data_type<T>::value));
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"detection_output", {format::bfyx, "", impl_types::cpu}}}));
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -695,7 +704,7 @@ public:
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "detection_output");
ASSERT_EQ(outputs.begin()->first, "output_reorder");
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().batch(), 1);
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().feature(), 1);
@ -742,6 +751,7 @@ public:
topology.add(input_layout("input_prior_box", input_prior_box->get_layout()));
topology.add(reorder("input_location_padded", input_info("input_location"), input_location->get_layout().with_padding(padding{ { 0, 0, 12, 3 },{ 0, 0, 5, 11 } })));
topology.add(reorder("input_confidence_padded", input_info("input_confidence"), input_location->get_layout().with_padding(padding{ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } })));
topology.add(reorder("output_reorder", input_info("detection_output"), format::bfyx, type_to_data_type<T>::value));
topology.add(detection_output("detection_output", input_info("input_location_padded"), input_info("input_confidence_padded"), input_info("input_prior_box"),
this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k,
@ -749,7 +759,10 @@ public:
prior_is_normalized, this->img_size, this->img_size
));
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"detection_output", {format::bfyx, "", impl_types::cpu}}}));
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input_location", input_location);
network->set_input_data("input_confidence", input_confidence);
@ -758,7 +771,7 @@ public:
auto outputs = network->execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "detection_output");
ASSERT_EQ(outputs.begin()->first, "output_reorder");
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().batch(), 1);
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().feature(), 1);