diff --git a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp index e816449b461..6cefa8ef5ed 100644 --- a/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_remote_context.cpp @@ -233,7 +233,12 @@ CLDNNExecutionContextImpl::CLDNNExecutionContextImpl(const std::shared_ptrget_info().supports_immad) + queue_type = cldnn::queue_types::in_order; + else + queue_type = cldnn::queue_types::out_of_order; + bool use_unified_shared_memory = true; m_engine = cldnn::engine::create(engine_type, runtime_type, dev, cldnn::engine_configuration(enable_profiling, queue_type, diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp new file mode 100644 index 00000000000..fffc039aefb --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/concatenation_onednn.cpp @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "concatenation_inst.h" +#include "eltwise_inst.h" +#include "quantize_inst.h" +#include "primitive_onednn_base.h" +#include "impls/implementation_map.hpp" + +#include "kernel_selector_common.h" + +#include + +#include +#include +namespace cldnn { +namespace onednn { + + + +namespace detail { + +attach_concatenation_onednn::attach_concatenation_onednn() { +} + +} // namespace detail +} // namespace onednn +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/eltwise_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/eltwise_onednn.cpp new file mode 100644 index 00000000000..22be0640f92 --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/eltwise_onednn.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "eltwise_inst.h" +#include "primitive_onednn_base.h" +#include "impls/implementation_map.hpp" + +#include "kernel_selector_common.h" + +#include + +#include +#include +namespace cldnn { +namespace onednn { + +namespace detail { + +attach_eltwise_onednn::attach_eltwise_onednn() { +} + +} // namespace detail +} // namespace onednn +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/fully_connected_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/fully_connected_onednn.cpp new file mode 100644 index 00000000000..31eb7be2f2e --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/fully_connected_onednn.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "fully_connected_inst.h" +#include "primitive_onednn_base.h" +#include "impls/implementation_map.hpp" + +#include "kernel_selector_common.h" + +#include + +#include +#include +namespace cldnn { +namespace onednn { + +namespace detail { + +attach_fully_connected_onednn::attach_fully_connected_onednn() { +} + +} // namespace detail +} // namespace onednn +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/gemm_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/gemm_onednn.cpp new file mode 100644 index 00000000000..62b5db638cc --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/gemm_onednn.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gemm_inst.h" +#include "primitive_onednn_base.h" +#include "impls/implementation_map.hpp" + +#include "kernel_selector_common.h" + +#include + +#include +#include +namespace cldnn { +namespace onednn { + +namespace detail { + +attach_gemm_onednn::attach_gemm_onednn() { +} + +} // namespace detail +} // namespace onednn +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/pooling_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/pooling_onednn.cpp new file mode 100644 index 00000000000..d865edd00fd --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/pooling_onednn.cpp @@ -0,0 +1,26 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "pooling_inst.h" +#include "primitive_onednn_base.h" +#include "impls/implementation_map.hpp" + +#include "kernel_selector_common.h" + +#include + +#include +#include +namespace cldnn { +namespace onednn { + + +namespace detail { + +attach_pooling_onednn::attach_pooling_onednn() { +} + +} // namespace detail +} // namespace onednn +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/register.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/register.cpp index af9c898cacd..ae2b6af79e3 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/register.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/register.cpp @@ -12,7 +12,12 @@ namespace onednn { void register_implementations() { REGISTER_ONEDNN_IMPL(convolution); -} + REGISTER_ONEDNN_IMPL(concatenation); + REGISTER_ONEDNN_IMPL(eltwise); + REGISTER_ONEDNN_IMPL(gemm); + REGISTER_ONEDNN_IMPL(pooling); + REGISTER_ONEDNN_IMPL(reorder); + REGISTER_ONEDNN_IMPL(fully_connected);} } // namespace onednn } // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/register.hpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/register.hpp index cee12eaeeac..ce54f08807f 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/register.hpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/register.hpp @@ -18,6 +18,12 @@ namespace detail { } REGISTER_ONEDNN_IMPL(convolution); +REGISTER_ONEDNN_IMPL(concatenation); +REGISTER_ONEDNN_IMPL(eltwise); +REGISTER_ONEDNN_IMPL(gemm); +REGISTER_ONEDNN_IMPL(pooling); +REGISTER_ONEDNN_IMPL(reorder); +REGISTER_ONEDNN_IMPL(fully_connected); #undef REGISTER_ONEDNN_IMPL diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/reorder_onednn.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/reorder_onednn.cpp new file mode 100644 index 00000000000..31a09be251c --- /dev/null +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/reorder_onednn.cpp @@ -0,0 +1,80 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "reorder_inst.h" +#include "primitive_onednn_base.h" +#include "impls/implementation_map.hpp" + +#include "kernel_selector_common.h" + +#include + +#include +#include +namespace cldnn { +namespace onednn { + +struct reorder_onednn : typed_primitive_onednn_impl { + using parent = typed_primitive_onednn_impl; + using parent::parent; + +protected: + std::unique_ptr clone() const override { + return make_unique(*this); + } + + std::unordered_map get_arguments(reorder_inst& instance) const override { + std::unordered_map args; + + int input_idx = DNNL_ARG_FROM; + for (size_t i = 0; i < instance.inputs_memory_count(); i++) { + auto& input = instance.input_memory(i); + args.insert({input_idx++, input.get_onednn_memory(_pd.src_desc())}); + } + + { + auto& output = instance.output_memory(); + args.insert({DNNL_ARG_TO, output.get_onednn_memory(_pd.dst_desc())}); + } + + return args; + } + + static std::shared_ptr get_reorder_descriptor(const reorder_node& arg) { + auto prim = arg.get_primitive(); + + auto& input = arg.get_dependency(0); + auto& engine = arg.get_program().get_engine(); + + auto input_md = onednn::layout_to_memory_desc(input.get_output_layout()); + auto output_md = onednn::layout_to_memory_desc(arg.get_output_layout()); + + return std::make_shared( + engine.get_onednn_engine(), + input_md, + engine.get_onednn_engine(), + output_md, + *get_primitive_attributes(arg)); + } + +public: + static primitive_impl* create(const reorder_node& arg) { + auto desc = get_reorder_descriptor(arg); + auto attr = get_primitive_attributes(arg); + + std::shared_ptr dummy = nullptr; + + return new reorder_onednn(arg, dummy, attr, *desc); + } +}; + +namespace detail { + +attach_reorder_onednn::attach_reorder_onednn() { + implementation_map::add(impl_types::onednn, reorder_onednn::create, {}); +} + +} // namespace detail +} // namespace onednn +} // namespace cldnn diff --git a/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp b/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp index a81a6799e60..8cbddc34d45 100644 --- a/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp +++ b/inference-engine/thirdparty/clDNN/src/impls/onednn/utils.cpp @@ -83,16 +83,16 @@ dnnl::memory::format_tag convert_data_format(cldnn::format fmt) { case cldnn::format::bfyx: return dnnl::memory::format_tag::nchw; case cldnn::format::bfzyx: return dnnl::memory::format_tag::ncdhw; case cldnn::format::byxf: return dnnl::memory::format_tag::nhwc; - // case cldnn::format::b_fs_yx_fsv16: return dnnl::memory::format_tag::nChw16c; - // case cldnn::format::b_fs_yx_fsv32: return dnnl::memory::format_tag::aBcd32b; - // case cldnn::format::b_fs_zyx_fsv16: return dnnl::memory::format_tag::nCdhw16c; - // case cldnn::format::b_fs_zyx_fsv32: return dnnl::memory::format_tag::aBcde32b; - // case cldnn::format::bs_fs_yx_bsv16_fsv16: return dnnl::memory::format_tag::NChw16n16c; - // case cldnn::format::bs_fs_yx_bsv32_fsv32: return dnnl::memory::format_tag::NChw32n32c; - // case cldnn::format::bs_fs_yx_bsv4_fsv4: return dnnl::memory::format_tag::ABcd4a4b; - // case cldnn::format::bs_fs_yx_bsv4_fsv2: return dnnl::memory::format_tag::ABcd4a2b; - // case cldnn::format::bs_fs_yx_bsv32_fsv16: return dnnl::memory::format_tag::NChw32n16c; - // case cldnn::format::bs_fs_zyx_bsv16_fsv16: return dnnl::memory::format_tag::NCdhw16n16c; + case cldnn::format::b_fs_yx_fsv16: return dnnl::memory::format_tag::nChw16c; + case cldnn::format::b_fs_yx_fsv32: return dnnl::memory::format_tag::aBcd32b; + case cldnn::format::b_fs_zyx_fsv16: return dnnl::memory::format_tag::nCdhw16c; + case cldnn::format::b_fs_zyx_fsv32: return dnnl::memory::format_tag::aBcde32b; + case cldnn::format::bs_fs_yx_bsv16_fsv16: return dnnl::memory::format_tag::NChw16n16c; + case cldnn::format::bs_fs_yx_bsv32_fsv32: return dnnl::memory::format_tag::NChw32n32c; + case cldnn::format::bs_fs_yx_bsv4_fsv4: return dnnl::memory::format_tag::ABcd4a4b; + case cldnn::format::bs_fs_yx_bsv4_fsv2: return dnnl::memory::format_tag::ABcd4a2b; + case cldnn::format::bs_fs_yx_bsv32_fsv16: return dnnl::memory::format_tag::NChw32n16c; + case cldnn::format::bs_fs_zyx_bsv16_fsv16: return dnnl::memory::format_tag::NCdhw16n16c; default: throw std::invalid_argument("[clDNN] Unsupported conversion from cldnn to ondnn layout " + fmt_to_str(fmt)); } } diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp index c14f6417395..0274cf730f0 100644 --- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp +++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp @@ -842,6 +842,48 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node) { const size_t kNStreams = static_cast(node.get_program().get_engine().configuration().n_streams); const size_t kKeyValue = kBatchNum * std::min(kClassNum, static_cast(8)) * kNStreams; preferred_impl = (kKeyValue > 64) ? impl_types::ocl : impl_types::cpu; + } else if (node.is_type()) { + if (!node.get_program().get_engine().get_device_info().supports_immad) + return impl_types::ocl; + + std::vector onednn_optimized_fmt = { + format::bfyx, + format::b_fs_zyx_fsv16, + format::b_fs_yx_fsv16, + format::b_fs_yx_fsv32, + format::bs_fs_yx_bsv16_fsv16, + format::bs_fs_zyx_bsv16_fsv16, + format::bs_fs_yx_bsv32_fsv16, + format::bs_fs_yx_bsv32_fsv32, + }; + + auto input_layout = node.get_dependency(0).get_output_layout(); + auto output_layout = node.get_output_layout(); + + auto input_fmt = input_layout.format; + auto output_fmt = output_layout.format; + + preferred_impl = impl_types::onednn; + + if (std::find(onednn_optimized_fmt.begin(), onednn_optimized_fmt.end(), input_fmt) == onednn_optimized_fmt.end() || + std::find(onednn_optimized_fmt.begin(), onednn_optimized_fmt.end(), output_fmt) == onednn_optimized_fmt.end()) { + preferred_impl = impl_types::ocl; + } + + // onednn doesn't support paddings + if (input_layout.data_padding || output_layout.data_padding) { + preferred_impl = impl_types::ocl; + } + + // Native impl works faster for this type of reorder + if (input_layout.format == format::bfyx && output_layout.format == format::bfyx) { + preferred_impl = impl_types::ocl; + } + + // onednn reorder doesn't support different number of dimensions in input and output layouts + if (input_layout.format.dimension() != output_layout.format.dimension()) { + preferred_impl = impl_types::ocl; + } } return preferred_impl; diff --git a/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp b/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp index 7704ea3a7f1..3859634e479 100644 --- a/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp +++ b/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp @@ -211,8 +211,9 @@ void dump_graph_init(std::ofstream& graph, !node->can_be_optimized()) { graph << "\\n Selected kernel: " << (node->get_selected_impl() == nullptr ? "none" - : node->get_selected_impl()->get_kernel_name()) + - "\n" + dump_mem_info(node); + : node->get_selected_impl()->get_kernel_name()) + " / " + << node->get_preferred_impl_type() + << "\n" + dump_mem_info(node); } graph << "\""; #ifdef __clang__ diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp index f2ce1cfbde5..32545853cf0 100644 --- a/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp +++ b/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp @@ -2394,3 +2394,57 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_REORDER, reorder_test, ::testing::ValuesIn(reorder_test::generate_specific_test_params()), tests::generic_test::custom_param_name_functor()); + + +#ifdef ENABLE_ONEDNN_FOR_GPU +TEST(reorder_onednn_gpu, basic_convert_int8) { + + auto& engine = get_onednn_test_engine(); + layout in_layout = { type_to_data_type::value, format::byxf, { 1, 1, 3, 3 } }; + layout byte_layout = { type_to_data_type::value, format::bfyx, { 1, 1, 3, 3 } }; + std::initializer_list input_f = { 1.0f, -2.6f, 3.1f, -4.0f, 5.03f, -6.99f, 7.0f, -8.0f, 9.0f }; + std::list final_results = { 1.0f, -3.0f, 3.0f, -4.0f, 5.0f, -7.0f, 7.0f, -8.0f, 9.0f }; + + // Allocate memory for input image. + auto input_memory = engine.allocate_memory(in_layout); + set_values(input_memory, input_f); + + // Create input_layout description + // "input" - is the primitive id inside topology + input_layout input("input", in_layout); + + topology topology( + // 1. input layout primitive. + input, + // 2. reorder primitive with id "reorder_input" + reorder("reorder_input", + // input primitive for reorder (implicitly converted to primitive_id) + input, + // output layout for reorder + byte_layout), + reorder("reorder2", "reorder_input", in_layout) + ); + + build_options options_target; + options_target.set_option(build_option::outputs({ "reorder_input", "reorder2"})); + implementation_desc impl = { format::bfyx, std::string(""), impl_types::onednn }; + options_target.set_option(build_option::force_implementations({{ "reorder_input", impl }})); + + network network( + engine, + topology, + options_target); + + network.set_input_data("input", input_memory); + + auto outputs = network.execute(); + + auto interm = outputs.at("reorder2").get_memory(); + cldnn::mem_lock interm_ptr(interm, get_test_stream()); + unsigned int cntr = 0; + for (const auto& exp : final_results) + { + EXPECT_EQ(exp, interm_ptr[cntr++]); + } +} +#endif