[GPU] Change priority of CPU implementations (#17829)
This commit is contained in:
parent
a9ddc2b553
commit
db8d23231a
@ -16,15 +16,15 @@
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
template <typename T, typename U>
|
||||
class singleton_map : public std::map<T, U> {
|
||||
singleton_map() : std::map<T, U>() {}
|
||||
singleton_map(singleton_map const&) = delete;
|
||||
void operator=(singleton_map const&) = delete;
|
||||
template <typename T>
|
||||
class singleton_list : public std::vector<T> {
|
||||
singleton_list() : std::vector<T>() {}
|
||||
singleton_list(singleton_list const&) = delete;
|
||||
void operator=(singleton_list const&) = delete;
|
||||
|
||||
public:
|
||||
static singleton_map& instance() {
|
||||
static singleton_map instance_;
|
||||
static singleton_list& instance() {
|
||||
static singleton_list instance_;
|
||||
return instance_;
|
||||
}
|
||||
};
|
||||
@ -47,20 +47,20 @@ public:
|
||||
using key_builder = implementation_key;
|
||||
using key_type = typename key_builder::type;
|
||||
using factory_type = std::function<std::unique_ptr<primitive_impl>(const typed_program_node<primitive_kind>&, const kernel_impl_params&)>;
|
||||
using map_type = singleton_map<std::pair<impl_types, shape_types>, std::pair<std::set<key_type>, factory_type>>;
|
||||
using list_type = singleton_list<std::tuple<impl_types, shape_types, std::set<key_type>, factory_type>>;
|
||||
|
||||
static factory_type get(const kernel_impl_params& impl_params, impl_types preferred_impl_type, shape_types target_shape_type) {
|
||||
auto input_layout = !impl_params.input_layouts.empty() ? impl_params.input_layouts[0] : layout{ov::PartialShape{}, data_types::f32, format::any};
|
||||
auto key = key_builder()(input_layout);
|
||||
for (auto& kv : map_type::instance()) {
|
||||
impl_types impl_type = kv.first.first;
|
||||
shape_types supported_shape_type = kv.first.second;
|
||||
for (auto& kv : list_type::instance()) {
|
||||
impl_types impl_type = std::get<0>(kv);
|
||||
shape_types supported_shape_type = std::get<1>(kv);
|
||||
if ((preferred_impl_type & impl_type) != impl_type)
|
||||
continue;
|
||||
if ((target_shape_type & supported_shape_type) != target_shape_type)
|
||||
continue;
|
||||
std::set<key_type>& keys_set = kv.second.first;
|
||||
auto& factory = kv.second.second;
|
||||
std::set<key_type>& keys_set = std::get<2>(kv);
|
||||
auto& factory = std::get<3>(kv);
|
||||
if (keys_set.empty() || keys_set.find(key) != keys_set.end()) {
|
||||
return factory;
|
||||
}
|
||||
@ -85,14 +85,14 @@ public:
|
||||
}
|
||||
|
||||
static bool check_key(impl_types target_impl_type, key_type key, shape_types target_shape_type) {
|
||||
for (auto& kv : map_type::instance()) {
|
||||
impl_types impl_type = kv.first.first;
|
||||
shape_types supported_shape_type = kv.first.second;
|
||||
for (auto& kv : list_type::instance()) {
|
||||
impl_types impl_type = std::get<0>(kv);
|
||||
shape_types supported_shape_type = std::get<1>(kv);
|
||||
if ((target_impl_type & impl_type) != impl_type)
|
||||
continue;
|
||||
if ((target_shape_type & supported_shape_type) != target_shape_type)
|
||||
continue;
|
||||
std::set<key_type>& keys_set = kv.second.first;
|
||||
std::set<key_type>& keys_set = std::get<2>(kv);
|
||||
if (keys_set.empty())
|
||||
return true;
|
||||
return keys_set.find(key) != keys_set.end();
|
||||
@ -117,7 +117,7 @@ public:
|
||||
|
||||
static void add(impl_types impl_type, shape_types shape_type, factory_type factory, std::set<key_type> keys) {
|
||||
OPENVINO_ASSERT(impl_type != impl_types::any, "[GPU] Can't register impl with type any");
|
||||
map_type::instance().insert({{impl_type, shape_type}, {keys, factory}});
|
||||
list_type::instance().push_back({impl_type, shape_type, keys, factory});
|
||||
}
|
||||
|
||||
static std::set<key_type> combine(const std::vector<data_types>& types, const std::vector<format::type>& formats) {
|
||||
@ -133,22 +133,22 @@ public:
|
||||
|
||||
struct WeightsReordersFactory {
|
||||
using factory_type = std::function<std::unique_ptr<primitive_impl>(const kernel_impl_params&)>;
|
||||
using map_type = singleton_map<std::pair<impl_types, shape_types>, factory_type>;
|
||||
using list_type = singleton_list<std::tuple<impl_types, shape_types, factory_type>>;
|
||||
static void add(impl_types impl_type, shape_types shape_type, factory_type factory) {
|
||||
OPENVINO_ASSERT(impl_type != impl_types::any, "[GPU] Can't register WeightsReordersFactory with type any");
|
||||
map_type::instance().insert({{impl_type, shape_type}, factory});
|
||||
list_type::instance().push_back({impl_type, shape_type, factory});
|
||||
}
|
||||
|
||||
static factory_type get(impl_types preferred_impl_type, shape_types target_shape_type) {
|
||||
for (auto& kv : map_type::instance()) {
|
||||
impl_types impl_type = kv.first.first;
|
||||
shape_types supported_shape_type = kv.first.second;
|
||||
for (auto& kv : list_type::instance()) {
|
||||
impl_types impl_type = std::get<0>(kv);
|
||||
shape_types supported_shape_type = std::get<1>(kv);
|
||||
if ((preferred_impl_type & impl_type) != impl_type)
|
||||
continue;
|
||||
if ((target_shape_type & supported_shape_type) != target_shape_type)
|
||||
continue;
|
||||
|
||||
return kv.second;
|
||||
return std::get<2>(kv);
|
||||
}
|
||||
OPENVINO_THROW("[GPU] WeightsReordersFactory doesn't have any implementation for "
|
||||
" impl_type: ", preferred_impl_type, ", shape_type: ", target_shape_type);
|
||||
|
@ -177,14 +177,18 @@ void program::init_program() {
|
||||
}
|
||||
|
||||
void program::init_primitives() {
|
||||
// Register implementations in order of their selection priority: common, OCL, oneDNN, CPU
|
||||
// We register OCL implementation before oneDNN, because oneDNN is not always preferable (in case of iGPU)
|
||||
// This order will only apply to primitives with preferrable implementation type equal to impl_types::any
|
||||
|
||||
static bool is_initialized = false;
|
||||
if (!is_initialized) {
|
||||
common::register_implementations();
|
||||
cpu::register_implementations();
|
||||
ocl::register_implementations();
|
||||
#ifdef ENABLE_ONEDNN_FOR_GPU
|
||||
onednn::register_implementations();
|
||||
#endif
|
||||
cpu::register_implementations();
|
||||
is_initialized = true;
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "test_utils.h"
|
||||
|
||||
#include <intel_gpu/primitives/input_layout.hpp>
|
||||
#include <intel_gpu/primitives/reorder.hpp>
|
||||
#include <intel_gpu/primitives/detection_output.hpp>
|
||||
|
||||
using namespace cldnn;
|
||||
@ -429,8 +430,12 @@ public:
|
||||
top_k, eta, code_type, variance_encoded_in_target, confidence_threshold, prior_info_size,
|
||||
prior_coordinates_offset, prior_is_normalized, input_width, input_height, decrease_label_id
|
||||
));
|
||||
topology.add(reorder("output_reorder", input_info("detection_output"), format::bfyx, type_to_data_type<T>::value));
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
|
||||
auto config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"detection_output", {format::bfyx, "", impl_types::cpu}}}));
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -439,7 +444,7 @@ public:
|
||||
auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "detection_output");
|
||||
ASSERT_EQ(outputs.begin()->first, "output_reorder");
|
||||
|
||||
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().batch(), 1);
|
||||
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().feature(), 1);
|
||||
@ -685,8 +690,12 @@ public:
|
||||
topology.add(reorder("input_confidence_padded", input_info("input_confidence"), input_location->get_layout().with_padding(padding{ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } })));
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location_padded"), input_info("input_confidence_padded"), input_info("input_prior_box"), this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
|
||||
topology.add(reorder("output_reorder", input_info("detection_output"), format::bfyx, type_to_data_type<T>::value));
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
|
||||
auto config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"detection_output", {format::bfyx, "", impl_types::cpu}}}));
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -695,7 +704,7 @@ public:
|
||||
auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "detection_output");
|
||||
ASSERT_EQ(outputs.begin()->first, "output_reorder");
|
||||
|
||||
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().batch(), 1);
|
||||
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().feature(), 1);
|
||||
@ -742,6 +751,7 @@ public:
|
||||
topology.add(input_layout("input_prior_box", input_prior_box->get_layout()));
|
||||
topology.add(reorder("input_location_padded", input_info("input_location"), input_location->get_layout().with_padding(padding{ { 0, 0, 12, 3 },{ 0, 0, 5, 11 } })));
|
||||
topology.add(reorder("input_confidence_padded", input_info("input_confidence"), input_location->get_layout().with_padding(padding{ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } })));
|
||||
topology.add(reorder("output_reorder", input_info("detection_output"), format::bfyx, type_to_data_type<T>::value));
|
||||
|
||||
topology.add(detection_output("detection_output", input_info("input_location_padded"), input_info("input_confidence_padded"), input_info("input_prior_box"),
|
||||
this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k,
|
||||
@ -749,7 +759,10 @@ public:
|
||||
prior_is_normalized, this->img_size, this->img_size
|
||||
));
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, get_test_default_config(engine), get_test_stream_ptr(), is_caching_test);
|
||||
auto config = get_test_default_config(engine);
|
||||
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"detection_output", {format::bfyx, "", impl_types::cpu}}}));
|
||||
|
||||
cldnn::network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
|
||||
|
||||
network->set_input_data("input_location", input_location);
|
||||
network->set_input_data("input_confidence", input_confidence);
|
||||
@ -758,7 +771,7 @@ public:
|
||||
auto outputs = network->execute();
|
||||
|
||||
ASSERT_EQ(outputs.size(), size_t(1));
|
||||
ASSERT_EQ(outputs.begin()->first, "detection_output");
|
||||
ASSERT_EQ(outputs.begin()->first, "output_reorder");
|
||||
|
||||
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().batch(), 1);
|
||||
ASSERT_EQ(outputs.begin()->second.get_memory()->get_layout().feature(), 1);
|
||||
|
Loading…
Reference in New Issue
Block a user