[GPU] Impls refactoring (#6603)
This commit is contained in:
parent
7be2b782ba
commit
788e76722f
@ -2,11 +2,10 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cldnn/runtime/engine.hpp"
|
||||
#include "cldnn/primitives/implementation_desc.hpp"
|
||||
|
||||
#include "topology.hpp"
|
||||
|
||||
@ -99,14 +98,6 @@ struct learning_params {
|
||||
learning_params() : momentum(0.9f), weights_decay(0.0005f) {}
|
||||
};
|
||||
|
||||
/// @brief Description of primitives implementation.
|
||||
struct implementation_desc {
|
||||
format::type output_format; ///< Output format.
|
||||
std::string kernel_name; ///< GPU kernel name.
|
||||
};
|
||||
|
||||
using implementation_forcing_map = std::map<primitive_id, implementation_desc>;
|
||||
|
||||
/// @brief Represents user-provided program build option.
|
||||
struct build_option {
|
||||
/// @brief Allow primitives fusing during program build (default: false).
|
||||
|
70
inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp
vendored
Normal file
70
inference-engine/thirdparty/clDNN/api/cldnn/primitives/implementation_desc.hpp
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cldnn/runtime/tensor.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
/// @brief Primitives implementation type.
|
||||
enum class impl_types : uint8_t {
|
||||
cpu = 1 << 0,
|
||||
common = 1 << 1,
|
||||
ocl = 1 << 2,
|
||||
any = 0xFF,
|
||||
};
|
||||
|
||||
inline impl_types operator&(impl_types a, impl_types b) {
|
||||
typedef std::underlying_type<impl_types>::type underlying_type;
|
||||
return static_cast<impl_types>(static_cast<underlying_type>(a) & static_cast<underlying_type>(b));
|
||||
}
|
||||
|
||||
inline impl_types operator|(impl_types a, impl_types b) {
|
||||
typedef std::underlying_type<impl_types>::type underlying_type;
|
||||
return static_cast<impl_types>(static_cast<underlying_type>(a) | static_cast<underlying_type>(b));
|
||||
}
|
||||
|
||||
inline impl_types operator~(impl_types a) {
|
||||
typedef std::underlying_type<impl_types>::type underlying_type;
|
||||
return static_cast<impl_types>(~static_cast<underlying_type>(a));
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& out, const impl_types& impl_type) {
|
||||
switch (impl_type) {
|
||||
case impl_types::cpu: out << "cpu"; break;
|
||||
case impl_types::common: out << "common"; break;
|
||||
case impl_types::ocl: out << "ocl"; break;
|
||||
case impl_types::any: out << "any"; break;
|
||||
default: out << "unknown"; break;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/// @brief Description of primitives implementation.
|
||||
struct implementation_desc {
|
||||
format::type output_format; ///< Output format.
|
||||
std::string kernel_name; ///< GPU kernel name.
|
||||
impl_types impl_type; ///< GPU implementation type.
|
||||
|
||||
implementation_desc() :
|
||||
output_format(format::any),
|
||||
kernel_name(""),
|
||||
impl_type(impl_types::any) {}
|
||||
|
||||
implementation_desc(format::type output_format,
|
||||
std::string kernel_name,
|
||||
impl_types impl_type = impl_types::any) :
|
||||
output_format(output_format),
|
||||
kernel_name(kernel_name),
|
||||
impl_type(impl_type) {}
|
||||
};
|
||||
|
||||
using implementation_forcing_map = std::map<primitive_id, implementation_desc>;
|
||||
|
||||
} // namespace cldnn
|
@ -38,12 +38,15 @@ file(GLOB __CLDNN_Headers__include
|
||||
"${__CLDNN_Directory__include}/*.hpp"
|
||||
)
|
||||
|
||||
set(__CLDNN_Directory__gpu "${CMAKE_CURRENT_SOURCE_DIR}/gpu")
|
||||
set(__CLDNN_Label__gpu "gpu")
|
||||
set(__CLDNN_Directory__impls "${CMAKE_CURRENT_SOURCE_DIR}/impls")
|
||||
set(__CLDNN_Label__gpu "impls")
|
||||
file(GLOB __CLDNN_Sources__gpu
|
||||
"${__CLDNN_Directory__gpu}/*.h"
|
||||
"${__CLDNN_Directory__gpu}/*.hpp"
|
||||
"${__CLDNN_Directory__gpu}/*.cpp"
|
||||
"${__CLDNN_Directory__impls}/common/*.hpp"
|
||||
"${__CLDNN_Directory__impls}/common/*.cpp"
|
||||
"${__CLDNN_Directory__impls}/cpu/*.hpp"
|
||||
"${__CLDNN_Directory__impls}/cpu/*.cpp"
|
||||
"${__CLDNN_Directory__impls}/ocl/*.hpp"
|
||||
"${__CLDNN_Directory__impls}/ocl/*.cpp"
|
||||
)
|
||||
|
||||
set(__CLDNN_Directory__cg_cache "${CLDNN__CODEGEN_INCDIR}")
|
||||
@ -130,5 +133,5 @@ endif()
|
||||
# ======================================================================================================
|
||||
|
||||
ie_sse42_optimization_flags(sse4_2_flags)
|
||||
set_source_files_properties(gpu/detection_output_cpu.cpp half.cpp
|
||||
set_source_files_properties(impls/cpu/detection_output.cpp half.cpp
|
||||
PROPERTIES COMPILE_FLAGS "${sse4_2_flags}")
|
||||
|
@ -1,126 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "activation_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "activation/activation_kernel_selector.h"
|
||||
#include "activation/activation_kernel_base.h"
|
||||
#include "register_gpu.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct activation_gpu : typed_primitive_gpu_impl<activation> {
|
||||
using parent = typed_primitive_gpu_impl<activation>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<activation_gpu>(*this);
|
||||
}
|
||||
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<activation>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
|
||||
if (_outer.is_parameterized()) {
|
||||
args.slope = instance.slope_memory();
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
static primitive_impl* create(const activation_node& arg) {
|
||||
auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
|
||||
auto activation_optional_params =
|
||||
get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
|
||||
|
||||
convert_new_activation_func(arg.get_primitive(), activation_params.activations);
|
||||
|
||||
if (arg.is_parameterized()) {
|
||||
const auto& slope_layout = arg.slope_input().get_output_layout();
|
||||
const auto& output_layout = arg.get_output_layout();
|
||||
|
||||
const auto params_num =
|
||||
kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
|
||||
|
||||
CLDNN_ERROR_LESS_THAN(arg.id(),
|
||||
"Slope layout size count",
|
||||
slope_layout.size.count(),
|
||||
"output_layout.size.feature[0] * params_num",
|
||||
static_cast<size_t>(output_layout.size.feature[0] * params_num),
|
||||
"Error - not enough data inside additional params buffer");
|
||||
|
||||
activation_params.inputActivationParams.push_back(convert_data_tensor(slope_layout));
|
||||
}
|
||||
|
||||
auto& kernel_selector = kernel_selector::activation_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(activation_params, activation_optional_params);
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto activation = new activation_gpu(arg, best_kernels[0]);
|
||||
|
||||
return activation;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_activation_gpu::attach_activation_gpu() {
|
||||
auto val_fw = activation_gpu::create;
|
||||
|
||||
implementation_map<activation>::add({
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw},
|
||||
// block f16 format
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw},
|
||||
// 3D
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw},
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw },
|
||||
// bfwzyx
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw},
|
||||
// fs_b_yx_fsv32
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,86 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "average_unpooling_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "average_unpooling/average_unpooling_kernel_selector.h"
|
||||
#include "average_unpooling/average_unpooling_kernel_base.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct average_unpooling_gpu : typed_primitive_gpu_impl<average_unpooling> {
|
||||
using parent = typed_primitive_gpu_impl<average_unpooling>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<average_unpooling_gpu>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<average_unpooling>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const average_unpooling_node& arg) {
|
||||
auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(arg);
|
||||
auto average_unpooling_optional_params =
|
||||
get_default_optional_params<kernel_selector::average_unpooling_optional_params>(arg.get_program());
|
||||
auto& params = average_unpooling_params;
|
||||
|
||||
auto primitive = arg.get_primitive();
|
||||
auto stride = primitive->stride;
|
||||
|
||||
params.unpoolSize = {
|
||||
(uint32_t)primitive->size.spatial[0],
|
||||
(uint32_t)primitive->size.spatial[1],
|
||||
};
|
||||
|
||||
params.unpoolStride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]};
|
||||
|
||||
auto& kernel_selector = kernel_selector::average_unpooling_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(average_unpooling_params, average_unpooling_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto average_unpool = new average_unpooling_gpu(arg, best_kernels[0]);
|
||||
|
||||
return average_unpool;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_average_unpooling_gpu::attach_average_unpooling_gpu() {
|
||||
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
|
||||
average_unpooling_gpu::create);
|
||||
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
|
||||
average_unpooling_gpu::create);
|
||||
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
average_unpooling_gpu::create);
|
||||
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
average_unpooling_gpu::create);
|
||||
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
|
||||
average_unpooling_gpu::create);
|
||||
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
|
||||
average_unpooling_gpu::create);
|
||||
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
|
||||
average_unpooling_gpu::create);
|
||||
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
|
||||
average_unpooling_gpu::create);
|
||||
implementation_map<average_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
|
||||
average_unpooling_gpu::create);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,77 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "batch_to_space_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "batch_to_space/batch_to_space_kernel_selector.h"
|
||||
#include "batch_to_space/batch_to_space_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "data_inst.h"
|
||||
#include <vector>
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
struct batch_to_space_gpu : typed_primitive_gpu_impl<batch_to_space> {
|
||||
using parent = typed_primitive_gpu_impl<batch_to_space>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<batch_to_space_gpu>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const batch_to_space_node& arg) {
|
||||
auto batch_to_space_params = get_default_params<kernel_selector::batch_to_space_params>(arg);
|
||||
auto batch_to_space_optional_params =
|
||||
get_default_optional_params<kernel_selector::batch_to_space_optional_params>(arg.get_program());
|
||||
|
||||
auto primitive = arg.get_primitive();
|
||||
|
||||
batch_to_space_params.block_shape = convert_dim_vector(primitive->block_shape);
|
||||
batch_to_space_params.crops_begin = convert_dim_vector(primitive->crops_begin);
|
||||
batch_to_space_params.crops_end = convert_dim_vector(primitive->crops_end);
|
||||
|
||||
auto& kernel_selector = kernel_selector::batch_to_space_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(batch_to_space_params, batch_to_space_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto batch_to_space = new batch_to_space_gpu(arg, best_kernels[0]);
|
||||
|
||||
return batch_to_space;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_batch_to_space_gpu::attach_batch_to_space_gpu() {
|
||||
auto val_fw = batch_to_space_gpu::create;
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<batch_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,100 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "border_inst.h"
|
||||
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "border/border_kernel_selector.h"
|
||||
#include "border/border_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct border_gpu : typed_primitive_gpu_impl<border> {
|
||||
using parent = typed_primitive_gpu_impl<border>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<border_gpu>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const border_node& arg) {
|
||||
auto b_params = get_default_params<kernel_selector::border_params>(arg, 1);
|
||||
auto b_optional_params =
|
||||
get_default_optional_params<kernel_selector::border_optional_params>(arg.get_program());
|
||||
|
||||
auto desc = arg.get_primitive();
|
||||
|
||||
b_params.lt_sizes = convert_dim_vector(desc->left_top_sizes);
|
||||
b_params.rb_sizes = convert_dim_vector(desc->right_bottom_sizes);
|
||||
b_params.border_value = desc->border_value;
|
||||
|
||||
switch (desc->type) {
|
||||
case border_type::constant:
|
||||
b_params.b_type = kernel_selector::border_type::CONSTANT;
|
||||
break;
|
||||
case border_type::edge:
|
||||
b_params.b_type = kernel_selector::border_type::EDGE;
|
||||
break;
|
||||
case border_type::mirror:
|
||||
b_params.b_type = kernel_selector::border_type::MIRROR;
|
||||
break;
|
||||
case border_type::mirror_101:
|
||||
b_params.b_type = kernel_selector::border_type::MIRROR_101;
|
||||
break;
|
||||
default:
|
||||
assert(
|
||||
false &&
|
||||
"Encountered unhandled enum case: border_type during translation to kernel selector enumeration.");
|
||||
}
|
||||
|
||||
auto& kernel_selector = kernel_selector::border_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
return new border_gpu(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_border_gpu::attach_border_gpu() {
|
||||
auto val_fw = border_gpu::create;
|
||||
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
|
||||
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
|
||||
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
|
||||
implementation_map<border>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,163 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "concatenation_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "concatenation/concatenation_kernel_selector.h"
|
||||
#include "concatenation/concatenation_kernel_base.h"
|
||||
|
||||
#include <initializer_list>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
namespace {
|
||||
kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis) {
|
||||
switch (axis) {
|
||||
case concatenation::along_x:
|
||||
return kernel_selector::concat_axis::X;
|
||||
case concatenation::along_y:
|
||||
return kernel_selector::concat_axis::Y;
|
||||
case concatenation::along_z:
|
||||
return kernel_selector::concat_axis::Z;
|
||||
case concatenation::along_w:
|
||||
return kernel_selector::concat_axis::W;
|
||||
case concatenation::along_f:
|
||||
return kernel_selector::concat_axis::FEATURE;
|
||||
case concatenation::along_b:
|
||||
return kernel_selector::concat_axis::BATCH;
|
||||
default:
|
||||
return kernel_selector::concat_axis::X;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
struct concatenation_gpu : typed_primitive_gpu_impl<concatenation> {
|
||||
using parent = typed_primitive_gpu_impl<concatenation>;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<concatenation_gpu>(*this);
|
||||
}
|
||||
|
||||
concatenation_gpu(const concatenation_node& arg, const kernel_selector::kernel_data& kd) : parent(arg, kd) {
|
||||
if (!_outer.can_be_optimized()) {
|
||||
CLDNN_ERROR_NOT_EQUAL(_outer.id(),
|
||||
"Input count",
|
||||
_outer.inputs_count(),
|
||||
"kds size",
|
||||
kd.kernels.size(),
|
||||
"Error - not enough kernels for concatenation");
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
bool optimized_out(concatenation_inst& instance) const override {
|
||||
return parent::optimized_out(instance) || _outer.can_be_optimized();
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const concatenation_node& arg) {
|
||||
if (arg.can_be_optimized()) {
|
||||
return new concatenation_gpu(arg, {});
|
||||
}
|
||||
|
||||
auto concat_params = get_default_params<kernel_selector::concatenation_params>(arg);
|
||||
auto concat_optional_params =
|
||||
get_default_optional_params<kernel_selector::concatenation_optional_params>(arg.get_program());
|
||||
auto axis = arg.get_primitive()->axis;
|
||||
|
||||
concat_params.inputs.resize(arg.inputs_count());
|
||||
for (size_t i = 0; i < arg.inputs_count(); ++i) {
|
||||
const layout& input_layout = arg.input(i).get_output_layout();
|
||||
concat_params.inputs[i] = convert_data_tensor(input_layout);
|
||||
}
|
||||
|
||||
concat_params.axis = convert_axis(axis);
|
||||
concat_optional_params.kernelPerInput = true;
|
||||
|
||||
auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(concat_params, concat_optional_params);
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
concatenation_gpu* concat = new concatenation_gpu(arg, best_kernels[0]);
|
||||
|
||||
return concat;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_concatenation_gpu::attach_concatenation_gpu() {
|
||||
implementation_map<concatenation>::add({
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), concatenation_gpu::create},
|
||||
// 5D
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), concatenation_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), concatenation_gpu::create },
|
||||
// block f16 format
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), concatenation_gpu::create},
|
||||
// MMAD
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), concatenation_gpu::create},
|
||||
// 6D
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), concatenation_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), concatenation_gpu::create},
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,127 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "crop_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "eltwise/eltwise_kernel_selector.h"
|
||||
#include "eltwise/eltwise_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct crop_gpu : typed_primitive_gpu_impl<crop> {
|
||||
using parent = typed_primitive_gpu_impl<crop>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<crop_gpu>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
bool optimized_out(crop_inst& instance) const override {
|
||||
return parent::optimized_out(instance) || _outer.can_be_optimized();
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const crop_node& arg) {
|
||||
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg, 1);
|
||||
auto ew_optional_params =
|
||||
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
|
||||
|
||||
ew_params.operations.push_back(
|
||||
{{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN});
|
||||
|
||||
const auto& input_layout = arg.input().get_output_layout();
|
||||
ew_params.inputs[0] = convert_data_tensor(input_layout, 1, arg.get_primitive()->offsets);
|
||||
|
||||
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto crop = new crop_gpu(arg, best_kernels[0]);
|
||||
|
||||
return crop;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_crop_gpu::attach_crop_gpu() {
|
||||
auto val_fw = crop_gpu::create;
|
||||
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
|
||||
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
|
||||
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::fyxb), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fyxb), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fyxb), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fyxb), val_fw);
|
||||
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
|
||||
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
|
||||
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,173 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "deconvolution_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "deconvolution/deconvolution_kernel_selector.h"
|
||||
#include "deconvolution/deconvolution_kernel_base.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct deconvolution_gpu : typed_primitive_gpu_impl<deconvolution> {
|
||||
using parent = typed_primitive_gpu_impl<deconvolution>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<deconvolution_gpu>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
// TODO: share it with convolution and fully connected
|
||||
bool validate_impl(const typed_primitive_inst<deconvolution>&) const override {
|
||||
bool res = true;
|
||||
|
||||
CLDNN_ERROR_NOT_EQUAL(_outer.id(),
|
||||
"deconvolution filling value",
|
||||
_outer.get_output_layout().data_padding.filling_value(),
|
||||
"padding mode",
|
||||
0.0f,
|
||||
"Unknown padding mode in deconvolution.");
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
|
||||
args.weights = instance.weights_memory(split);
|
||||
args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr;
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
int32_t get_split() const override { return _outer.get_split(); }
|
||||
|
||||
uint32_t get_groups() const override { return _outer.get_groups(); }
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const deconvolution_node& arg) {
|
||||
const auto& primitive = arg.get_primitive();
|
||||
const auto& weights_layout = arg.weights(0).get_output_layout();
|
||||
|
||||
const auto& weights_size = weights_layout.size;
|
||||
|
||||
const auto& split = primitive->split();
|
||||
const auto& stride = primitive->stride;
|
||||
#if 0 // TODO: support dilation
|
||||
const auto& dilation = primitive->dilation;
|
||||
#else
|
||||
const tensor dilation = {0, 0, 1, 1, 1};
|
||||
#endif
|
||||
const auto actual_split = split;
|
||||
|
||||
const auto& input_offset = primitive->input_offset;
|
||||
const auto& groups = primitive->groups;
|
||||
|
||||
auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(
|
||||
arg,
|
||||
(groups > 1) ? 1 : actual_split,
|
||||
1,
|
||||
primitive->grouped_weights_shape);
|
||||
auto deconv_optional_params =
|
||||
get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
|
||||
|
||||
deconv_params.split = split;
|
||||
deconv_params.groups = groups;
|
||||
|
||||
auto spatial_size = arg.get_output_layout().format.dimension() - 2;
|
||||
uint32_t kx = weights_size.spatial[0];
|
||||
uint32_t ky = weights_size.spatial[1];
|
||||
uint32_t kz = spatial_size == 2 ? 1 : weights_size.spatial[2];
|
||||
deconv_params.filterSize = { kx, ky, kz };
|
||||
|
||||
deconv_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0),
|
||||
(uint32_t)std::max(-input_offset.spatial[1], 0),
|
||||
(uint32_t)std::max(-input_offset.spatial[2], 0)};
|
||||
|
||||
deconv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]};
|
||||
|
||||
deconv_params.dilation = {(uint32_t)dilation.spatial[0],
|
||||
(uint32_t)dilation.spatial[1],
|
||||
(uint32_t)dilation.spatial[2]};
|
||||
|
||||
auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with these arguments");
|
||||
auto deconv = new deconvolution_gpu(arg, best_kernels[0]);
|
||||
|
||||
return deconv;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_deconvolution_gpu::attach_deconvolution_gpu() {
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
implementation_map<deconvolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
deconvolution_gpu::create);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,70 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "depth_to_space_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "depth_to_space/depth_to_space_kernel_selector.h"
|
||||
#include "depth_to_space/depth_to_space_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "common_types.h"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
struct depth_to_space_gpu : typed_primitive_gpu_impl<depth_to_space> {
|
||||
using parent = typed_primitive_gpu_impl<depth_to_space>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<depth_to_space_gpu>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const depth_to_space_node& arg) {
|
||||
auto depth_to_space_params = get_default_params<kernel_selector::depth_to_space_params>(arg);
|
||||
auto depth_to_space_optional_params =
|
||||
get_default_optional_params<kernel_selector::depth_to_space_optional_params>(arg.get_program());
|
||||
|
||||
depth_to_space_params.block_size = arg.get_primitive()->block_size;
|
||||
depth_to_space_params.mode = arg.get_primitive()->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST
|
||||
: kernel_selector::depth_to_space_mode::DEPTH_FIRST;
|
||||
|
||||
auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(depth_to_space_params, depth_to_space_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto depth_to_space = new depth_to_space_gpu(arg, best_kernels[0]);
|
||||
|
||||
return depth_to_space;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_depth_to_space_gpu::attach_depth_to_space_gpu() {
|
||||
auto val_fw = depth_to_space_gpu::create;
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,195 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "eltwise_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "eltwise/eltwise_kernel_selector.h"
|
||||
#include "eltwise/eltwise_kernel_base.h"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct eltwise_gpu : typed_primitive_gpu_impl<eltwise> {
|
||||
using parent = typed_primitive_gpu_impl<eltwise>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<eltwise_gpu>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<eltwise>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const eltwise_node& arg) {
|
||||
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
|
||||
auto ew_optional_params =
|
||||
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
|
||||
|
||||
for (size_t i = 1; i < arg.inputs_count(); i++) {
|
||||
ew_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
|
||||
}
|
||||
|
||||
const auto& primitive = arg.get_primitive();
|
||||
|
||||
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
|
||||
kernel_selector::eltwise_params::InputType::Buffer(1)},
|
||||
convert_to_eltwise_mode(primitive->mode)});
|
||||
|
||||
for (uint32_t i = 2; i < static_cast<uint32_t>(arg.inputs_count()); i++) {
|
||||
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(i - 2),
|
||||
kernel_selector::eltwise_params::InputType::Buffer(i)},
|
||||
convert_to_eltwise_mode(primitive->mode)});
|
||||
}
|
||||
|
||||
if (primitive->mode == eltwise_mode::sum) {
|
||||
ew_params.coefficients = primitive->coefficients;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ew_params.inputs.size(); i++) {
|
||||
if (!ew_params.inputs[i].SameDims(ew_params.output)) {
|
||||
std::vector<int32_t> input_size = arg.input(i).get_output_layout().size.raw.vector();
|
||||
std::vector<int32_t> output_size = arg.get_output_layout().size.raw.vector();
|
||||
bool broadcast = false;
|
||||
for (size_t d = 0; d < output_size.size(); d++) {
|
||||
if (output_size[d] != 1 && input_size[d] == 1)
|
||||
broadcast = true;
|
||||
}
|
||||
if (broadcast) {
|
||||
ew_params.broadcast = true;
|
||||
break;
|
||||
} else {
|
||||
ew_params.layoutBased = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// stride
|
||||
if (!primitive->stride.empty()) {
|
||||
const auto& stride = primitive->stride;
|
||||
ew_params.stride.resize(stride.size());
|
||||
for (size_t i = 0; i < primitive->stride.size(); i++) {
|
||||
ew_params.stride[i] = {(uint32_t)stride[i].spatial[0],
|
||||
(uint32_t)stride[i].spatial[1],
|
||||
(uint32_t)stride[i].spatial[2]};
|
||||
}
|
||||
}
|
||||
|
||||
// check if strides are the same
|
||||
if (!ew_params.stride.empty()) {
|
||||
const auto& stride = ew_params.stride[0];
|
||||
for (size_t i = 1; i < ew_params.stride.size(); i++) {
|
||||
if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y)
|
||||
ew_params.layoutBased = true;
|
||||
}
|
||||
} else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1])) {
|
||||
ew_params.broadcast = true;
|
||||
}
|
||||
|
||||
// TODO [LOW PRECISION]: check if this parameter's really needed. Maybe data types are enough
|
||||
bool quantization = true;
|
||||
for (size_t i = 0; i < arg.inputs_count(); i++) {
|
||||
if (arg.input(i).get_output_layout().data_type != data_types::u8 &&
|
||||
arg.input(i).get_output_layout().data_type != data_types::i8) {
|
||||
quantization = false;
|
||||
}
|
||||
}
|
||||
ew_params.int8_quantization = quantization;
|
||||
|
||||
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto eltwise = new eltwise_gpu(arg, best_kernels[0]);
|
||||
|
||||
return eltwise;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_eltwise_gpu::attach_eltwise_gpu() {
|
||||
implementation_map<eltwise>::add(
|
||||
{{ std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), eltwise_gpu::create },
|
||||
// block f16
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), eltwise_gpu::create },
|
||||
// 3D
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), eltwise_gpu::create },
|
||||
// 4D
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bfwzyx), eltwise_gpu::create },
|
||||
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::b_fs_zyx_fsv16), eltwise_gpu::create },
|
||||
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i64, format::bs_fs_zyx_bsv16_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), eltwise_gpu::create },
|
||||
// MMAD
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), eltwise_gpu::create },
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), eltwise_gpu::create },
|
||||
|
||||
//
|
||||
{ std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), eltwise_gpu::create }});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,68 +0,0 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "gather_nd_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "gather/gather_nd_kernel_selector.h"
|
||||
#include "gather/gather_nd_kernel_ref.h"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct gather_nd_gpu : typed_primitive_gpu_impl<gather_nd> {
|
||||
using parent = typed_primitive_gpu_impl<gather_nd>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<gather_nd_gpu>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const gather_nd_node& arg) {
|
||||
auto gather_nd_params = get_default_params<kernel_selector::gather_nd_params>(arg);
|
||||
auto gather_nd_optional_params =
|
||||
get_default_optional_params<kernel_selector::gather_nd_optional_params>(arg.get_program());
|
||||
|
||||
gather_nd_params.indices_rank = arg.get_primitive()->indices_rank;
|
||||
gather_nd_params.batch_dims = arg.get_primitive()->batch_dims;
|
||||
|
||||
gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
|
||||
|
||||
auto& kernel_selector = kernel_selector::gather_nd_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(gather_nd_params, gather_nd_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto gather_nd = new gather_nd_gpu(arg, best_kernels[0]);
|
||||
|
||||
return gather_nd;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_gather_nd_gpu::attach_gather_nd_gpu() {
|
||||
auto val_fw = gather_nd_gpu::create;
|
||||
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
|
||||
|
||||
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<gather_nd>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,266 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <initializer_list>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace mputils {
|
||||
template <typename... Tys>
|
||||
struct type_tuple;
|
||||
|
||||
template <std::size_t... Idxs>
|
||||
struct index_tuple {};
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
template <typename TypeTupleTy, typename ElemTy>
|
||||
struct count_tt;
|
||||
|
||||
template <typename Ty, typename... Tys, typename ElemTy>
|
||||
struct count_tt<type_tuple<Ty, Tys...>, ElemTy>
|
||||
: std::integral_constant<std::size_t,
|
||||
count_tt<type_tuple<Tys...>, ElemTy>::value +
|
||||
static_cast<std::size_t>(std::is_same<Ty, ElemTy>::value)> {};
|
||||
|
||||
template <typename ElemTy>
|
||||
struct count_tt<type_tuple<>, ElemTy> : std::integral_constant<std::size_t, 0> {};
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
template <typename TypeTupleTy>
|
||||
struct size_tt;
|
||||
|
||||
template <typename... Tys>
|
||||
struct size_tt<type_tuple<Tys...>> : std::integral_constant<std::size_t, sizeof...(Tys)> {};
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
template <typename TypeTupleTy, typename ElemTy>
|
||||
struct split_tt;
|
||||
|
||||
namespace detail {
|
||||
template <typename TypeTupleTy, typename ElemTy, typename FirstTupleTy>
|
||||
struct split_tt_helper1;
|
||||
|
||||
template <typename Ty, typename... Tys, typename ElemTy, typename... FirstTys>
|
||||
struct split_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, type_tuple<FirstTys...>>
|
||||
: split_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<FirstTys..., Ty>> {};
|
||||
|
||||
template <typename Ty, typename... Tys, typename... FirstTys>
|
||||
struct split_tt_helper1<type_tuple<Ty, Tys...>, Ty, type_tuple<FirstTys...>> {
|
||||
using first_type = type_tuple<FirstTys...>;
|
||||
using second_type = type_tuple<Tys...>;
|
||||
};
|
||||
|
||||
template <typename ElemTy, typename... FirstTys>
|
||||
struct split_tt_helper1<type_tuple<>, ElemTy, type_tuple<FirstTys...>> {
|
||||
using first_type = type_tuple<>;
|
||||
using second_type = type_tuple<FirstTys...>;
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
template <typename... Tys, typename ElemTy>
|
||||
struct split_tt<type_tuple<Tys...>, ElemTy> : detail::split_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<>> {};
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
template <typename TypeTupleTy, typename ElemTy>
|
||||
struct index_of_tt;
|
||||
|
||||
static constexpr std::size_t npos = static_cast<std::size_t>(-1);
|
||||
|
||||
namespace detail {
|
||||
template <typename TypeTupleTy, typename ElemTy, std::size_t Pos>
|
||||
struct index_of_tt_helper1;
|
||||
|
||||
template <typename Ty, typename... Tys, typename ElemTy, std::size_t Pos>
|
||||
struct index_of_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, Pos>
|
||||
: index_of_tt_helper1<type_tuple<Tys...>, ElemTy, Pos + 1> {};
|
||||
|
||||
template <typename Ty, typename... Tys, std::size_t Pos>
|
||||
struct index_of_tt_helper1<type_tuple<Ty, Tys...>, Ty, Pos> : std::integral_constant<std::size_t, Pos> {};
|
||||
|
||||
template <typename ElemTy, std::size_t Pos>
|
||||
struct index_of_tt_helper1<type_tuple<>, ElemTy, Pos> : std::integral_constant<std::size_t, npos> {};
|
||||
} // namespace detail
|
||||
|
||||
template <typename... Tys, typename ElemTy>
|
||||
struct index_of_tt<type_tuple<Tys...>, ElemTy> : detail::index_of_tt_helper1<type_tuple<Tys...>, ElemTy, 0> {};
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
template <typename TypeTupleTy, typename ElemTy>
|
||||
struct remove_tt;
|
||||
|
||||
namespace detail {
|
||||
template <typename TypeTupleTy, typename ElemTy, typename ResultTupleTy>
|
||||
struct remove_tt_helper1;
|
||||
|
||||
template <typename Ty, typename... Tys, typename ElemTy, typename... ResultTys>
|
||||
struct remove_tt_helper1<type_tuple<Ty, Tys...>, ElemTy, type_tuple<ResultTys...>>
|
||||
: remove_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<ResultTys..., Ty>> {};
|
||||
|
||||
template <typename Ty, typename... Tys, typename... ResultTys>
|
||||
struct remove_tt_helper1<type_tuple<Ty, Tys...>, Ty, type_tuple<ResultTys...>>
|
||||
: remove_tt_helper1<type_tuple<Tys...>, Ty, type_tuple<ResultTys...>> {};
|
||||
|
||||
template <typename ElemTy, typename... ResultTys>
|
||||
struct remove_tt_helper1<type_tuple<>, ElemTy, type_tuple<ResultTys...>> {
|
||||
using type = type_tuple<ResultTys...>;
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
template <typename... Tys, typename ElemTy>
|
||||
struct remove_tt<type_tuple<Tys...>, ElemTy> : detail::remove_tt_helper1<type_tuple<Tys...>, ElemTy, type_tuple<>> {};
|
||||
|
||||
template <typename TypeTupleTy, typename ElemTy>
|
||||
using remove_tt_t = typename remove_tt<TypeTupleTy, ElemTy>::type;
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
template <template <typename...> class VariadicTTy, typename TypeTupleTy>
|
||||
struct make_vttype_tt;
|
||||
|
||||
template <template <typename...> class VariadicTTy, typename... Tys>
|
||||
struct make_vttype_tt<VariadicTTy, type_tuple<Tys...>> {
|
||||
using type = VariadicTTy<Tys...>;
|
||||
};
|
||||
|
||||
template <template <typename...> class VariadicTTy, typename TypeTupleTy>
|
||||
using make_vttype_tt_t = typename make_vttype_tt<VariadicTTy, TypeTupleTy>::type;
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
template <typename TypeTupleTy>
|
||||
struct make_indexer_tt;
|
||||
|
||||
namespace detail {
|
||||
template <typename TypeTupleTy, std::size_t Idx, typename IdxTupleTy>
|
||||
struct make_indexer_tt_helper1;
|
||||
|
||||
template <typename Ty, typename... Tys, std::size_t Idx, std::size_t... Idxs>
|
||||
struct make_indexer_tt_helper1<type_tuple<Ty, Tys...>, Idx, index_tuple<Idxs...>>
|
||||
: make_indexer_tt_helper1<type_tuple<Tys...>, Idx + 1, index_tuple<Idxs..., Idx>> {};
|
||||
|
||||
template <std::size_t Idx, typename IdxTupleTy>
|
||||
struct make_indexer_tt_helper1<type_tuple<>, Idx, IdxTupleTy> {
|
||||
using type = IdxTupleTy;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template <typename... Tys>
|
||||
struct make_indexer_tt<type_tuple<Tys...>> : detail::make_indexer_tt_helper1<type_tuple<Tys...>, 0, index_tuple<>> {};
|
||||
|
||||
template <typename TypeTupleTy>
|
||||
using make_indexer_tt_t = typename make_indexer_tt<TypeTupleTy>::type;
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
namespace detail {
|
||||
template <template <typename> class DefaultValSelectorTTy,
|
||||
std::size_t DefaultedStartPos,
|
||||
std::size_t Idx,
|
||||
typename ArgTy>
|
||||
constexpr auto select_arg_or_default(ArgTy&& arg) -> typename std::decay<ArgTy>::type {
|
||||
return (Idx < DefaultedStartPos) ? std::forward<ArgTy>(arg)
|
||||
: DefaultValSelectorTTy<typename std::decay<ArgTy>::type>::value;
|
||||
}
|
||||
|
||||
template <template <typename> class DefaultValSelectorTTy,
|
||||
std::size_t DefaultedStartPos,
|
||||
std::size_t... Idxs,
|
||||
typename... ArgTys>
|
||||
constexpr auto make_partially_defaulted_std_tuple(index_tuple<Idxs...>&&, ArgTys&&... args)
|
||||
-> std::tuple<typename std::decay<ArgTys>::type...> {
|
||||
return std::make_tuple(
|
||||
select_arg_or_default<DefaultValSelectorTTy, DefaultedStartPos, Idxs>(std::forward<ArgTys>(args))...);
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
template <template <typename> class DefaultValSelectorTTy, std::size_t DefaultedStartPos, typename... ArgTys>
|
||||
constexpr auto make_partially_defaulted_std_tuple(ArgTys&&... args)
|
||||
-> std::tuple<typename std::decay<ArgTys>::type...> {
|
||||
return detail::make_partially_defaulted_std_tuple<DefaultValSelectorTTy, DefaultedStartPos>(
|
||||
make_indexer_tt_t<type_tuple<ArgTys...>>(),
|
||||
std::forward<ArgTys>(args)...);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------------
|
||||
|
||||
} // namespace mputils
|
||||
|
||||
/// Marker type that separates required selectors from optional ones in kernel selector signature.
|
||||
struct kd_optional_selector_t {};
|
||||
|
||||
template <typename Ty>
|
||||
struct kd_default_value_selector {
|
||||
static constexpr Ty value = static_cast<Ty>(0);
|
||||
};
|
||||
|
||||
template <typename KernelDataTy, typename OuterTy, std::size_t ReqSelectorCount, typename SelectorsTupleTy>
|
||||
class kd_selector;
|
||||
|
||||
template <typename KernelDataTy, typename OuterTy, std::size_t ReqSelectorCount, typename... SelectorTys>
|
||||
class kd_selector<KernelDataTy, OuterTy, ReqSelectorCount, mputils::type_tuple<SelectorTys...>> {
|
||||
using _selector_types = mputils::type_tuple<SelectorTys...>;
|
||||
static_assert(mputils::count_tt<_selector_types, kd_optional_selector_t>::value == 0,
|
||||
"Optional selectors separator can be specified only in template alias. "
|
||||
"Please do not use this class directly - use kd_selector_t alias instead.");
|
||||
static_assert(mputils::size_tt<_selector_types>::value > 0, "At least one selector type must be specified.");
|
||||
static_assert(ReqSelectorCount <= mputils::size_tt<_selector_types>::value,
|
||||
"Number of required selectors is invalid.");
|
||||
|
||||
public:
|
||||
using key_type = mputils::make_vttype_tt_t<std::tuple, _selector_types>;
|
||||
|
||||
using hash_type = std::hash<key_type>;
|
||||
using mapped_type = KernelDataTy (*)(const OuterTy&);
|
||||
using map_type = std::unordered_map<key_type, mapped_type, hash_type>;
|
||||
using value_type = typename map_type::value_type;
|
||||
|
||||
private:
|
||||
map_type _kernel_map;
|
||||
|
||||
template <std::size_t Idx>
|
||||
KernelDataTy _get_kernel(mputils::index_tuple<Idx>&&, const OuterTy& outer, const SelectorTys&... selectors) {
|
||||
auto value = _kernel_map.find(
|
||||
mputils::make_partially_defaulted_std_tuple<kd_default_value_selector, Idx - 1>(selectors...));
|
||||
if (value == _kernel_map.end())
|
||||
return _get_kernel(mputils::index_tuple<Idx - 1>(), outer, selectors...);
|
||||
|
||||
return value->second(outer);
|
||||
}
|
||||
|
||||
static KernelDataTy _get_kernel(mputils::index_tuple<ReqSelectorCount>&&, const OuterTy&, const SelectorTys&...) {
|
||||
throw std::runtime_error("ERROR: no default element in map for kernel data!!!");
|
||||
}
|
||||
|
||||
public:
|
||||
kd_selector(const std::initializer_list<value_type>& l) : _kernel_map(l) {}
|
||||
|
||||
KernelDataTy get_kernel(const OuterTy& outer, const SelectorTys&... selectors) {
|
||||
return _get_kernel(mputils::index_tuple<sizeof...(SelectorTys) + 1>(), outer, selectors...);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename KernelDataTy, typename OuterTy, typename... SelectorTys>
|
||||
using kd_selector_t =
|
||||
kd_selector<KernelDataTy,
|
||||
OuterTy,
|
||||
mputils::index_of_tt<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>::value !=
|
||||
mputils::npos
|
||||
? mputils::index_of_tt<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>::value
|
||||
: sizeof...(SelectorTys),
|
||||
mputils::remove_tt_t<mputils::type_tuple<SelectorTys...>, kd_optional_selector_t>>;
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,84 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "lrn_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "lrn/lrn_kernel_selector.h"
|
||||
#include "lrn/lrn_kernel_base.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct lrn_gpu : typed_primitive_gpu_impl<lrn> {
|
||||
using parent = typed_primitive_gpu_impl<lrn>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<lrn_gpu>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const lrn_node& arg) {
|
||||
auto lrn_params = get_default_params<kernel_selector::lrn_params>(arg);
|
||||
auto lrn_optional_params = get_default_optional_params<kernel_selector::lrn_optional_params>(arg.get_program());
|
||||
|
||||
const auto& primitive = arg.get_primitive();
|
||||
|
||||
lrn_params.alpha = primitive->alpha;
|
||||
lrn_params.beta = primitive->beta;
|
||||
lrn_params.k = primitive->k;
|
||||
lrn_params.localSize = primitive->size;
|
||||
lrn_params.divMode = kernel_selector::kernel_divider_mode::FIXED;
|
||||
lrn_params.normMode = primitive->norm_region == lrn_norm_region_within_channel
|
||||
? kernel_selector::lrn_mode::WITHIN_CHANNEL
|
||||
: kernel_selector::lrn_mode::ACROSS_CHANNEL;
|
||||
|
||||
auto& kernel_selector = kernel_selector::lrn_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(lrn_params, lrn_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto lrn = new lrn_gpu(arg, best_kernels[0]);
|
||||
|
||||
return lrn;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_lrn_gpu::attach_lrn_gpu() {
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), lrn_gpu::create);
|
||||
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), lrn_gpu::create);
|
||||
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), lrn_gpu::create);
|
||||
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), lrn_gpu::create);
|
||||
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), lrn_gpu::create);
|
||||
implementation_map<lrn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), lrn_gpu::create);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,32 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "mutable_data_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct mutable_data_gpu : public typed_primitive_gpu_impl<mutable_data> {
|
||||
using parent = typed_primitive_gpu_impl<mutable_data>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<mutable_data_gpu>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_gpu(arg, {}); }
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_mutable_data_gpu::attach_mutable_data_gpu() {
|
||||
implementation_map<mutable_data>::add({{engine_types::ocl, mutable_data_gpu::create}});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,106 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "mvn_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "mvn/mvn_kernel_selector.h"
|
||||
#include "mvn/mvn_kernel_base.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct mvn_gpu : typed_primitive_gpu_impl<mvn> {
|
||||
using parent = typed_primitive_gpu_impl<mvn>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<mvn_gpu>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const mvn_node& arg) {
|
||||
auto mvn_params = get_default_params<kernel_selector::mvn_params>(arg);
|
||||
auto mvn_optional_params = get_default_optional_params<kernel_selector::mvn_optional_params>(arg.get_program());
|
||||
|
||||
mvn_params.mvnMode = arg.get_primitive()->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS
|
||||
: kernel_selector::mvn_mode::WITHIN_CHANNELS;
|
||||
mvn_params.mvnNormalizeVariance = arg.get_primitive()->normalize_variance;
|
||||
mvn_params.epsilon = arg.get_primitive()->epsilon;
|
||||
|
||||
mvn_params.mvnEpsMode = arg.get_primitive()->eps_inside_sqrt ? kernel_selector::mvn_eps_mode::INSIDE_SQRT
|
||||
: kernel_selector::mvn_eps_mode::OUTSIDE_SQRT;
|
||||
|
||||
auto& kernel_selector = kernel_selector::mvn_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(mvn_params, mvn_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto mvn = new mvn_gpu(arg, best_kernels[0]);
|
||||
|
||||
return mvn;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_mvn_gpu::attach_mvn_gpu() {
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16),
|
||||
mvn_gpu::create);
|
||||
implementation_map<mvn>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16),
|
||||
mvn_gpu::create);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,93 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "normalize_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "normalize/normalize_kernel_selector.h"
|
||||
#include "normalize/normalize_kernel_base.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct normalize_gpu : typed_primitive_gpu_impl<normalize> {
|
||||
using parent = typed_primitive_gpu_impl<normalize>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<normalize_gpu>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<normalize>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
args.scale_table = instance.scale_memory();
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const normalize_node& arg) {
|
||||
auto norm_params = get_default_params<kernel_selector::normalize_params>(arg);
|
||||
auto norm_optional_params =
|
||||
get_default_optional_params<kernel_selector::normalize_optional_params>(arg.get_program());
|
||||
|
||||
const auto& scale_layout = arg.scale().get_output_layout();
|
||||
|
||||
norm_params.normMode = arg.get_primitive()->across_spatial ? kernel_selector::normalize_mode::ACROSS_SPATIAL
|
||||
: kernel_selector::normalize_mode::WITHIN_SPATIAL;
|
||||
norm_params.epsilon = arg.get_primitive()->epsilon;
|
||||
norm_params.scaleTable = convert_data_tensor(scale_layout).FlattenFeatureAndSpatials();
|
||||
|
||||
auto& kernel_selector = kernel_selector::normalize_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(norm_params, norm_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto lrn = new normalize_gpu(arg, best_kernels[0]);
|
||||
|
||||
return lrn;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_normalize_gpu::attach_normalize_gpu() {
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
|
||||
normalize_gpu::create);
|
||||
implementation_map<normalize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf),
|
||||
normalize_gpu::create);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,74 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "one_hot_inst.h"
|
||||
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "one_hot/one_hot_kernel_selector.h"
|
||||
#include "one_hot/one_hot_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct one_hot_gpu : typed_primitive_gpu_impl<one_hot> {
|
||||
using parent = typed_primitive_gpu_impl<one_hot>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<one_hot_gpu>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const one_hot_node& arg) {
|
||||
auto oh_params = get_default_params<kernel_selector::one_hot_params>(arg, 1);
|
||||
auto oh_optional_params =
|
||||
get_default_optional_params<kernel_selector::one_hot_optional_params>(arg.get_program());
|
||||
|
||||
oh_params.one_hot_axis = arg.get_primitive()->one_hot_axis;
|
||||
oh_params.on_value = arg.get_primitive()->on_value;
|
||||
oh_params.off_value = arg.get_primitive()->off_value;
|
||||
|
||||
auto output_sizes = arg.get_output_layout().format == format::bfzyx ?
|
||||
arg.get_output_layout().size.sizes(format::bfzyx) :
|
||||
arg.get_output_layout().size.sizes(format::bfyx);
|
||||
|
||||
oh_params.one_hot_limit = output_sizes[oh_params.one_hot_axis];
|
||||
|
||||
auto& kernel_selector = kernel_selector::one_hot_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(oh_params, oh_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with these arguments");
|
||||
|
||||
return new one_hot_gpu(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_one_hot_gpu::attach_one_hot_gpu() {
|
||||
auto val_fw = one_hot_gpu::create;
|
||||
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,174 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "quantize_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "quantize/quantize_kernel_selector.h"
|
||||
#include "quantize/quantize_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct quantize_gpu : typed_primitive_gpu_impl<quantize> {
|
||||
using parent = typed_primitive_gpu_impl<quantize>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<quantize_gpu>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<quantize>& instance, int32_t) const override {
|
||||
kernel_arguments_data args;
|
||||
|
||||
for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
|
||||
args.inputs.push_back(instance.input_memory_ptr(i));
|
||||
}
|
||||
if (instance.node.get_scale_shift_opt()) {
|
||||
if (instance.node.get_dependencies().size() == 9) {
|
||||
args.inputs.push_back(instance.dep_memory_ptr(5));
|
||||
args.inputs.push_back(instance.dep_memory_ptr(6));
|
||||
args.inputs.push_back(instance.dep_memory_ptr(7));
|
||||
args.inputs.push_back(instance.dep_memory_ptr(8));
|
||||
}
|
||||
}
|
||||
args.output = instance.output_memory_ptr();
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const quantize_node& arg) {
|
||||
auto quantize_params = get_default_params<kernel_selector::quantize_params>(arg);
|
||||
auto quantize_optional_params =
|
||||
get_default_optional_params<kernel_selector::quantize_optional_params>(arg.get_program());
|
||||
|
||||
quantize_params.levels = arg.get_levels();
|
||||
quantize_params.packed_binary_output = arg.get_packed_binary_output();
|
||||
quantize_params.scale_shift_opt = arg.get_scale_shift_opt();
|
||||
quantize_params.has_post_scale = arg.get_need_post_scale();
|
||||
quantize_params.has_post_shift = arg.get_need_post_shift();
|
||||
quantize_params.has_pre_shift = arg.get_need_pre_shift();
|
||||
quantize_params.has_clamp = arg.get_need_clamp();
|
||||
|
||||
quantize_params.per_tensor_input_range = arg.get_per_tensor_input_range();
|
||||
quantize_params.per_tensor_input_scale = arg.get_per_tensor_input_scale();
|
||||
quantize_params.per_tensor_input_shift = arg.get_per_tensor_input_shift();
|
||||
quantize_params.per_tensor_output_scale = arg.get_per_tensor_output_scale();
|
||||
quantize_params.per_tensor_output_shift = arg.get_per_tensor_output_shift();
|
||||
|
||||
quantize_params.in_lo = arg.get_input_lo_val();
|
||||
quantize_params.in_hi = arg.get_input_hi_val();
|
||||
quantize_params.in_scale = arg.get_input_scale_val();
|
||||
quantize_params.in_shift = arg.get_input_shift_val();
|
||||
quantize_params.out_scale = arg.get_output_scale_val();
|
||||
quantize_params.out_shift = arg.get_output_shift_val();
|
||||
|
||||
for (size_t i = 1; i < arg.inputs_count(); i++) {
|
||||
quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
|
||||
}
|
||||
const auto& output_layout = arg.get_output_layout();
|
||||
quantize_params.output = convert_data_tensor(output_layout);
|
||||
|
||||
auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(quantize_params, quantize_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto quantize = new quantize_gpu(arg, best_kernels[0]);
|
||||
|
||||
return quantize;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_quantize_gpu::attach_quantize_gpu() {
|
||||
auto val_fw = quantize_gpu::create;
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_b_yx_fsv32), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fs_b_yx_fsv32), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
|
||||
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<quantize>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,86 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "register_gpu.hpp"
|
||||
|
||||
namespace cldnn { namespace gpu {
|
||||
|
||||
#define REGISTER_GPU(prim) \
|
||||
static detail::attach_##prim##_gpu attach_##prim
|
||||
|
||||
void register_implementations_gpu() {
|
||||
REGISTER_GPU(activation);
|
||||
REGISTER_GPU(arg_max_min);
|
||||
REGISTER_GPU(average_unpooling);
|
||||
REGISTER_GPU(binary_convolution);
|
||||
REGISTER_GPU(border);
|
||||
REGISTER_GPU(broadcast);
|
||||
REGISTER_GPU(concatenation);
|
||||
REGISTER_GPU(condition);
|
||||
REGISTER_GPU(convolution);
|
||||
REGISTER_GPU(crop);
|
||||
REGISTER_GPU(custom_gpu_primitive);
|
||||
REGISTER_GPU(data);
|
||||
REGISTER_GPU(deconvolution);
|
||||
REGISTER_GPU(deformable_conv);
|
||||
REGISTER_GPU(deformable_interp);
|
||||
REGISTER_GPU(depth_to_space);
|
||||
REGISTER_GPU(batch_to_space);
|
||||
REGISTER_GPU(detection_output);
|
||||
REGISTER_GPU(eltwise);
|
||||
REGISTER_GPU(fully_connected);
|
||||
REGISTER_GPU(gather);
|
||||
REGISTER_GPU(gather_nd);
|
||||
REGISTER_GPU(gemm);
|
||||
REGISTER_GPU(input_layout);
|
||||
REGISTER_GPU(lrn);
|
||||
REGISTER_GPU(lstm_gemm);
|
||||
REGISTER_GPU(lstm_elt);
|
||||
REGISTER_GPU(max_unpooling);
|
||||
REGISTER_GPU(mutable_data);
|
||||
REGISTER_GPU(mvn);
|
||||
REGISTER_GPU(normalize);
|
||||
REGISTER_GPU(one_hot);
|
||||
REGISTER_GPU(permute);
|
||||
REGISTER_GPU(pooling);
|
||||
REGISTER_GPU(prior_box);
|
||||
REGISTER_GPU(proposal);
|
||||
REGISTER_GPU(pyramid_roi_align);
|
||||
REGISTER_GPU(quantize);
|
||||
REGISTER_GPU(reduce);
|
||||
REGISTER_GPU(region_yolo);
|
||||
REGISTER_GPU(reorder);
|
||||
REGISTER_GPU(reorg_yolo);
|
||||
REGISTER_GPU(reshape);
|
||||
REGISTER_GPU(reverse_sequence);
|
||||
REGISTER_GPU(roi_pooling);
|
||||
REGISTER_GPU(scale);
|
||||
REGISTER_GPU(scatter_update);
|
||||
REGISTER_GPU(scatter_nd_update);
|
||||
REGISTER_GPU(scatter_elements_update);
|
||||
REGISTER_GPU(select);
|
||||
REGISTER_GPU(shuffle_channels);
|
||||
REGISTER_GPU(softmax);
|
||||
REGISTER_GPU(space_to_batch);
|
||||
REGISTER_GPU(space_to_depth);
|
||||
REGISTER_GPU(strided_slice);
|
||||
REGISTER_GPU(tile);
|
||||
REGISTER_GPU(fused_conv_eltwise);
|
||||
REGISTER_GPU(lstm_dynamic_input);
|
||||
REGISTER_GPU(lstm_dynamic_timeloop);
|
||||
REGISTER_GPU(generic_layer);
|
||||
REGISTER_GPU(gather_tree);
|
||||
REGISTER_GPU(resample);
|
||||
REGISTER_GPU(non_max_suppression);
|
||||
REGISTER_GPU(grn);
|
||||
REGISTER_GPU(ctc_greedy_decoder);
|
||||
REGISTER_GPU(cum_sum);
|
||||
REGISTER_GPU(embedding_bag);
|
||||
REGISTER_GPU(extract_image_patches);
|
||||
REGISTER_GPU(loop);
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,144 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "scale_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "eltwise/eltwise_kernel_selector.h"
|
||||
#include "eltwise/eltwise_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct scale_gpu : typed_primitive_gpu_impl<scale> {
|
||||
using parent = typed_primitive_gpu_impl<scale>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<scale_gpu>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
args.inputs = {instance.input_memory_ptr(), instance.scale_memory()};
|
||||
args.output = instance.output_memory_ptr();
|
||||
|
||||
if (_outer.bias_term()) {
|
||||
args.inputs.push_back(instance.bias_memory());
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const scale_node& arg) {
|
||||
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
|
||||
auto ew_optional_params =
|
||||
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
|
||||
|
||||
ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout()));
|
||||
|
||||
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
|
||||
kernel_selector::eltwise_params::InputType::Buffer(1)},
|
||||
kernel_selector::eltwise_mode::MUL});
|
||||
|
||||
if (arg.bias_term()) {
|
||||
ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout()));
|
||||
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(0),
|
||||
kernel_selector::eltwise_params::InputType::Buffer(2)},
|
||||
kernel_selector::eltwise_mode::ADD});
|
||||
}
|
||||
|
||||
ew_params.layoutBased = true;
|
||||
|
||||
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto scale = new scale_gpu(arg, best_kernels[0]);
|
||||
|
||||
return scale;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_scale_gpu::attach_scale_gpu() {
|
||||
auto val_fw = scale_gpu::create;
|
||||
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
|
||||
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
|
||||
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
|
||||
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fs_b_yx_fsv32), val_fw);
|
||||
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv4), val_fw);
|
||||
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv32), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), val_fw);
|
||||
implementation_map<scale>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_zyx_fsv32), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,70 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "select_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "select/select_kernel_selector.h"
|
||||
#include "select/select_kernel_base.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct select_gpu : typed_primitive_gpu_impl<select> {
|
||||
using parent = typed_primitive_gpu_impl<select>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<select_gpu>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const select_node& arg) {
|
||||
auto select_params = get_default_params<kernel_selector::select_params>(arg);
|
||||
auto select_optional_params =
|
||||
get_default_optional_params<kernel_selector::select_optional_params>(arg.get_program());
|
||||
|
||||
for (size_t i = 1; i < arg.inputs_count(); i++) {
|
||||
select_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
|
||||
}
|
||||
|
||||
auto& kernel_selector = kernel_selector::select_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(select_params, select_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto select = new select_gpu(arg, best_kernels[0]);
|
||||
|
||||
return select;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_select_gpu::attach_select_gpu() {
|
||||
implementation_map<select>::add(
|
||||
{{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), select_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), select_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), select_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), select_gpu::create},
|
||||
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), select_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), select_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), select_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), select_gpu::create},
|
||||
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), select_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), select_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), select_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), select_gpu::create}});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,85 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "shuffle_channels_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "shuffle_channels/shuffle_channels_kernel_selector.h"
|
||||
#include "shuffle_channels/shuffle_channels_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct shuffle_channels_gpu : typed_primitive_gpu_impl<shuffle_channels> {
|
||||
using parent = typed_primitive_gpu_impl<shuffle_channels>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<shuffle_channels_gpu>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const shuffle_channels_node& arg) {
|
||||
auto shuffle_channels_params = get_default_params<kernel_selector::shuffle_channels_params>(arg);
|
||||
auto shuffle_channels_optional_params =
|
||||
get_default_optional_params<kernel_selector::shuffle_channels_optional_params>(arg.get_program());
|
||||
|
||||
const int32_t number_of_dims = 4;
|
||||
int32_t axis = arg.get_primitive()->axis;
|
||||
|
||||
if (axis < 0)
|
||||
axis += number_of_dims;
|
||||
|
||||
shuffle_channels_params.group = arg.get_primitive()->group;
|
||||
shuffle_channels_params.axis = axis;
|
||||
|
||||
auto& kernel_selector = kernel_selector::shuffle_channels_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(shuffle_channels_params, shuffle_channels_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto shuffle_channels = new shuffle_channels_gpu(arg, best_kernels[0]);
|
||||
|
||||
return shuffle_channels;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_shuffle_channels_gpu::attach_shuffle_channels_gpu() {
|
||||
auto val_fw = shuffle_channels_gpu::create;
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
|
||||
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
|
||||
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), val_fw);
|
||||
implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,77 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "space_to_batch_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "space_to_batch/space_to_batch_kernel_selector.h"
|
||||
#include "space_to_batch/space_to_batch_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "data_inst.h"
|
||||
#include <vector>
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
struct space_to_batch_gpu : typed_primitive_gpu_impl<space_to_batch> {
|
||||
using parent = typed_primitive_gpu_impl<space_to_batch>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<space_to_batch_gpu>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const space_to_batch_node& arg) {
|
||||
auto space_to_batch_params = get_default_params<kernel_selector::space_to_batch_params>(arg);
|
||||
auto space_to_batch_optional_params =
|
||||
get_default_optional_params<kernel_selector::space_to_batch_optional_params>(arg.get_program());
|
||||
|
||||
auto primitive = arg.get_primitive();
|
||||
|
||||
space_to_batch_params.block_shape = convert_dim_vector(primitive->block_shape);
|
||||
space_to_batch_params.pads_begin = convert_dim_vector(primitive->pads_begin);
|
||||
space_to_batch_params.pads_end = convert_dim_vector(primitive->pads_end);
|
||||
|
||||
auto& kernel_selector = kernel_selector::space_to_batch_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(space_to_batch_params, space_to_batch_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto space_to_batch = new space_to_batch_gpu(arg, best_kernels[0]);
|
||||
|
||||
return space_to_batch;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_space_to_batch_gpu::attach_space_to_batch_gpu() {
|
||||
auto val_fw = space_to_batch_gpu::create;
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<space_to_batch>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,79 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "space_to_depth_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "space_to_depth/space_to_depth_kernel_selector.h"
|
||||
#include "space_to_depth/space_to_depth_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
struct space_to_depth_gpu : typed_primitive_gpu_impl<space_to_depth> {
|
||||
using parent = typed_primitive_gpu_impl<space_to_depth>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<space_to_depth_gpu>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const space_to_depth_node& arg) {
|
||||
auto space_to_depth_params = get_default_params<kernel_selector::space_to_depth_params>(arg);
|
||||
auto space_to_depth_optional_params =
|
||||
get_default_optional_params<kernel_selector::space_to_depth_optional_params>(arg.get_program());
|
||||
|
||||
space_to_depth_params.depth_mode = (arg.get_primitive()->mode == space_to_depth::blocks_first) ?
|
||||
kernel_selector::SpaceToDepthMode::BLOCKS_FIRST :
|
||||
kernel_selector::SpaceToDepthMode::DEPTH_FIRST;
|
||||
|
||||
space_to_depth_params.block_size = arg.get_primitive()->block_size;
|
||||
|
||||
auto& kernel_selector = kernel_selector::space_to_depth_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(space_to_depth_params, space_to_depth_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto space_to_depth = new space_to_depth_gpu(arg, best_kernels[0]);
|
||||
|
||||
return space_to_depth;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_space_to_depth_gpu::attach_space_to_depth_gpu() {
|
||||
auto val_fw = space_to_depth_gpu::create;
|
||||
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
|
||||
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<space_to_depth>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -1,72 +0,0 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "tile_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "tile/tile_kernel_selector.h"
|
||||
#include "tile/tile_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
|
||||
struct tile_gpu : typed_primitive_gpu_impl<tile> {
|
||||
using parent = typed_primitive_gpu_impl<tile>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<tile_gpu>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const tile_node& arg) {
|
||||
auto tile_params = get_default_params<kernel_selector::tile_params>(arg);
|
||||
auto tile_optional_params =
|
||||
get_default_optional_params<kernel_selector::tile_optional_params>(arg.get_program());
|
||||
|
||||
auto& kernel_selector = kernel_selector::tile_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(tile_params, tile_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto tile = new tile_gpu(arg, best_kernels[0]);
|
||||
|
||||
return tile;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_tile_gpu::attach_tile_gpu() {
|
||||
auto val_fw = tile_gpu::create;
|
||||
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<tile>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
@ -51,7 +51,7 @@ void add_required_reorders::run(program_impl& p) {
|
||||
continue; // only nodes with dependencies
|
||||
if (usr->is_type<data>())
|
||||
continue;
|
||||
if (usr->type()->does_an_implementation_exist(p.get_engine(), *usr))
|
||||
if (usr->type()->does_an_implementation_exist(*usr))
|
||||
continue;
|
||||
|
||||
bool correct_layout_selected = false;
|
||||
@ -71,7 +71,7 @@ void add_required_reorders::run(program_impl& p) {
|
||||
node->get_output_layout().format,
|
||||
original_layout.size);
|
||||
usr->set_output_layout(current_layout, false);
|
||||
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
|
||||
if (usr->type()->does_possible_implementation_exist(*usr)) {
|
||||
correct_layout_selected = true;
|
||||
break;
|
||||
} else if (original_layout.data_type == data_types::i64) {
|
||||
@ -80,14 +80,14 @@ void add_required_reorders::run(program_impl& p) {
|
||||
current_layout = original_layout;
|
||||
current_layout.data_type = data_types::i32;
|
||||
usr->set_output_layout(current_layout, false);
|
||||
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
|
||||
if (usr->type()->does_possible_implementation_exist(*usr)) {
|
||||
correct_layout_selected = true;
|
||||
} else {
|
||||
current_layout = original_layout;
|
||||
current_layout.data_type = data_types::i32;
|
||||
current_layout.format = node->get_output_layout().format;
|
||||
usr->set_output_layout(current_layout, false);
|
||||
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
|
||||
if (usr->type()->does_possible_implementation_exist(*usr)) {
|
||||
correct_layout_selected = true;
|
||||
}
|
||||
}
|
||||
@ -148,7 +148,7 @@ void add_required_reorders::run(program_impl& p) {
|
||||
new_layout_format,
|
||||
original_layout.size);
|
||||
usr->set_output_layout(current_layout, false);
|
||||
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
|
||||
if (usr->type()->does_possible_implementation_exist(*usr)) {
|
||||
correct_layout_selected = true;
|
||||
break;
|
||||
}
|
||||
@ -164,7 +164,7 @@ void add_required_reorders::run(program_impl& p) {
|
||||
|
||||
usr->set_output_layout(original_layout_i32, false);
|
||||
|
||||
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
|
||||
if (usr->type()->does_possible_implementation_exist(*usr)) {
|
||||
correct_layout_selected = true;
|
||||
}
|
||||
|
||||
@ -174,7 +174,7 @@ void add_required_reorders::run(program_impl& p) {
|
||||
new_layout_format,
|
||||
original_layout_i32.size);
|
||||
usr->set_output_layout(current_layout_i32, false);
|
||||
if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr)) {
|
||||
if (usr->type()->does_possible_implementation_exist(*usr)) {
|
||||
correct_layout_selected = true;
|
||||
break;
|
||||
}
|
||||
|
@ -42,7 +42,7 @@ void compile_graph::run(program_impl& p) {
|
||||
auto& node = *(std::next(proc_order.begin(), i));
|
||||
node->set_unique_id(std::to_string(i));
|
||||
if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
|
||||
node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
|
||||
node->selected_impl = node->type()->choose_impl(*node);
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -51,7 +51,7 @@ void compile_graph::run(program_impl& p) {
|
||||
#else
|
||||
for (auto& node : p.get_processing_order()) {
|
||||
if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty())) {
|
||||
node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
|
||||
node->selected_impl = node->type()->choose_impl(*node);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -5,7 +5,7 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "pass_manager.h"
|
||||
#include "gpu/primitive_gpu_base.h"
|
||||
#include "impls/ocl/primitive_base.hpp"
|
||||
#include "fully_connected/fully_connected_params.h"
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
@ -45,7 +45,7 @@ void post_input_reorder::run(program_impl& p) {
|
||||
const auto impl = node->get_selected_impl();
|
||||
// add a reorder if primitive's input format doesn't match implementation's input format
|
||||
if (node->is_type<fully_connected>()) {
|
||||
const auto& fc_impl = dynamic_cast<const gpu::typed_primitive_gpu_impl<fully_connected>&>(*impl);
|
||||
const auto& fc_impl = dynamic_cast<const ocl::typed_primitive_impl_ocl<fully_connected>&>(*impl);
|
||||
const auto& fc_params = *static_cast<kernel_selector::fully_connected_params*>(fc_impl._kernel_data.params.get());
|
||||
|
||||
auto layout_format = from_data_layout(fc_params.inputs[0].GetLayout());
|
||||
@ -62,7 +62,7 @@ void post_input_reorder::run(program_impl& p) {
|
||||
reorder.set_unique_id(node->get_unique_id() + "_input_reorder");
|
||||
reorder.get_output_layout(false);
|
||||
node->set_output_layout(previous_layout, false);
|
||||
reorder.set_selected_impl(reorder.type()->choose_impl(p.get_engine(), reorder));
|
||||
reorder.set_selected_impl(reorder.type()->choose_impl(reorder));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ void post_optimize_weights::optimize_weights(T& node, program_impl& p) {
|
||||
// Don't run impl selection to avoid double compilation of reorder kernels
|
||||
// in main program and internal program for constant propagation
|
||||
if (!g_node.is_constant())
|
||||
g_node.selected_impl = g_node.type()->choose_impl(p.get_engine(), g_node);
|
||||
g_node.selected_impl = g_node.type()->choose_impl(g_node);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,9 +27,8 @@ void remove_redundant_reorders::run(program_impl& p) {
|
||||
if (!update_implementations)
|
||||
return;
|
||||
|
||||
auto& eng = p.get_engine();
|
||||
node.set_unique_id(node.get_unique_id() + "_reorder");
|
||||
auto new_impl = node.type()->choose_impl(eng, node);
|
||||
auto new_impl = node.type()->choose_impl(node);
|
||||
node.set_selected_impl(std::move(new_impl));
|
||||
};
|
||||
|
||||
@ -300,7 +299,7 @@ void remove_redundant_reorders::run(program_impl& p) {
|
||||
continue;
|
||||
|
||||
input.set_output_layout(output_layout, false);
|
||||
if (input.type()->does_possible_implementation_exist(p.get_engine(), input)) {
|
||||
if (input.type()->does_possible_implementation_exist(input)) {
|
||||
p.replace_all_usages(node, input);
|
||||
p.add_optimized_primitive_info(node.id());
|
||||
p.remove_all_connections(node);
|
||||
|
@ -58,7 +58,10 @@ std::map<program_node*, format::type> get_preferred_formats(program_impl& p, lay
|
||||
continue;
|
||||
|
||||
auto ex = lo.get_preferred_format(*n);
|
||||
auto impl = lo.get_preferred_impl_type(*n);
|
||||
fmt_map[n] = ex;
|
||||
|
||||
n->set_preferred_impl_type(impl);
|
||||
}
|
||||
return fmt_map;
|
||||
}
|
||||
|
@ -4,23 +4,23 @@
|
||||
|
||||
#include "condition_inst.h"
|
||||
#include "network_impl.h"
|
||||
#include "implementation_map.h"
|
||||
#include "register_gpu.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "register.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace common {
|
||||
|
||||
struct condition_gpu : typed_primitive_impl<condition> {
|
||||
struct condition_impl : typed_primitive_impl<condition> {
|
||||
const condition_node& outer;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<condition_gpu>(*this);
|
||||
return make_unique<condition_impl>(*this);
|
||||
}
|
||||
|
||||
explicit condition_gpu(const condition_node& outer) : outer(outer) {}
|
||||
explicit condition_impl(const condition_node& outer) : outer(outer) {}
|
||||
|
||||
event::ptr execute_impl(const std::vector<event::ptr>& events, condition_inst& instance) override {
|
||||
for (auto& a : events) {
|
||||
@ -42,7 +42,7 @@ struct condition_gpu : typed_primitive_impl<condition> {
|
||||
return ev;
|
||||
}
|
||||
|
||||
static primitive_impl* create(const condition_node& arg) { return new condition_gpu(arg); }
|
||||
static primitive_impl* create(const condition_node& arg) { return new condition_impl(arg); }
|
||||
|
||||
void init_kernels() override {}
|
||||
|
||||
@ -117,13 +117,13 @@ private:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_condition_gpu::attach_condition_gpu() {
|
||||
implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
condition_gpu::create);
|
||||
implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
|
||||
condition_gpu::create);
|
||||
attach_condition_common::attach_condition_common() {
|
||||
implementation_map<condition>::add(impl_types::common, condition_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace common
|
||||
} // namespace cldnn
|
@ -5,25 +5,25 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "loop_inst.h"
|
||||
#include "network_impl.h"
|
||||
#include "implementation_map.h"
|
||||
#include "register_gpu.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "register.hpp"
|
||||
#include "mutable_data_inst.h"
|
||||
#include "input_layout_inst.h"
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
struct loop_gpu : typed_primitive_impl<loop> {
|
||||
namespace common {
|
||||
struct loop_impl : typed_primitive_impl<loop> {
|
||||
const loop_node& node;
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<loop_gpu>(*this);
|
||||
return make_unique<loop_impl>(*this);
|
||||
}
|
||||
|
||||
void init_kernels() override {}
|
||||
|
||||
loop_gpu(const loop_gpu& other) : typed_primitive_impl<loop>(other), node(other.node) {}
|
||||
explicit loop_gpu(const loop_node& node) : node(node) {}
|
||||
loop_impl(const loop_impl& other) : typed_primitive_impl<loop>(other), node(other.node) {}
|
||||
explicit loop_impl(const loop_node& node) : node(node) {}
|
||||
|
||||
// read scala value from data primitive
|
||||
static int64_t read_scalar_value(memory::ptr mem, stream& stream) {
|
||||
@ -216,14 +216,14 @@ struct loop_gpu : typed_primitive_impl<loop> {
|
||||
return ev;
|
||||
}
|
||||
|
||||
static primitive_impl* create(const loop_node& arg) { return new loop_gpu(arg); }
|
||||
static primitive_impl* create(const loop_node& arg) { return new loop_impl(arg); }
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
attach_loop_gpu::attach_loop_gpu() {
|
||||
implementation_map<loop>::add({{engine_types::ocl, loop_gpu::create}});
|
||||
attach_loop_common::attach_loop_common() {
|
||||
implementation_map<loop>::add(impl_types::common, loop_impl::create, {});
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace common
|
||||
} // namespace cldnn
|
22
inference-engine/thirdparty/clDNN/src/impls/common/register.cpp
vendored
Normal file
22
inference-engine/thirdparty/clDNN/src/impls/common/register.cpp
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "register.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace common {
|
||||
|
||||
#define REGISTER_COMMON(prim) \
|
||||
static detail::attach_##prim##_common attach_##prim
|
||||
|
||||
void register_implementations() {
|
||||
REGISTER_COMMON(condition);
|
||||
REGISTER_COMMON(data);
|
||||
REGISTER_COMMON(input_layout);
|
||||
REGISTER_COMMON(loop);
|
||||
REGISTER_COMMON(prior_box);
|
||||
}
|
||||
|
||||
} // namespace common
|
||||
} // namespace cldnn
|
35
inference-engine/thirdparty/clDNN/src/impls/common/register.hpp
vendored
Normal file
35
inference-engine/thirdparty/clDNN/src/impls/common/register.hpp
vendored
Normal file
@ -0,0 +1,35 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cldnn/primitives/condition.hpp"
|
||||
#include "cldnn/primitives/loop.hpp"
|
||||
#include "cldnn/primitives/data.hpp"
|
||||
#include "cldnn/primitives/input_layout.hpp"
|
||||
#include "cldnn/primitives/prior_box.hpp"
|
||||
|
||||
|
||||
namespace cldnn {
|
||||
namespace common {
|
||||
void register_implementations();
|
||||
|
||||
namespace detail {
|
||||
|
||||
#define REGISTER_COMMON(prim) \
|
||||
struct attach_##prim##_common { \
|
||||
attach_##prim##_common(); \
|
||||
}
|
||||
|
||||
REGISTER_COMMON(condition);
|
||||
REGISTER_COMMON(data);
|
||||
REGISTER_COMMON(input_layout);
|
||||
REGISTER_COMMON(loop);
|
||||
REGISTER_COMMON(prior_box);
|
||||
|
||||
#undef REGISTER_COMMON
|
||||
|
||||
} // namespace detail
|
||||
} // namespace common
|
||||
} // namespace cldnn
|
@ -6,21 +6,20 @@
|
||||
#include "data_inst.h"
|
||||
#include "prior_box_inst.h"
|
||||
#include "input_layout_inst.h"
|
||||
#include "implementation_map.h"
|
||||
#include "register_gpu.hpp"
|
||||
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "register.hpp"
|
||||
#include "network_impl.h"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace common {
|
||||
|
||||
class wait_for_events_gpu : public primitive_impl {
|
||||
class wait_for_events_impl : public primitive_impl {
|
||||
public:
|
||||
explicit wait_for_events_gpu(const program_node& /*node*/) {}
|
||||
explicit wait_for_events_impl(const program_node& /*node*/) {}
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<wait_for_events_gpu>(*this);
|
||||
return make_unique<wait_for_events_impl>(*this);
|
||||
}
|
||||
|
||||
void init_kernels() override {}
|
||||
@ -33,32 +32,32 @@ public:
|
||||
|
||||
bool validate(const primitive_inst&) const override { return true; }
|
||||
|
||||
static primitive_impl* create_data(const data_node& data) { return new wait_for_events_gpu(data); }
|
||||
static primitive_impl* create_data(const data_node& data) { return new wait_for_events_impl(data); }
|
||||
|
||||
static primitive_impl* create_input_layout(const input_layout_node& input) {
|
||||
return new wait_for_events_gpu(input);
|
||||
return new wait_for_events_impl(input);
|
||||
}
|
||||
|
||||
static primitive_impl* create_prior_box(const prior_box_node& prior_box) {
|
||||
// This primitive is being executed on CPU during network compilation.
|
||||
return new wait_for_events_gpu(prior_box);
|
||||
return new wait_for_events_impl(prior_box);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_data_gpu::attach_data_gpu() {
|
||||
implementation_map<data>::add({ {engine_types::ocl, wait_for_events_gpu::create_data} });
|
||||
attach_data_common::attach_data_common() {
|
||||
implementation_map<data>::add(impl_types::common, wait_for_events_impl::create_data, {});
|
||||
}
|
||||
|
||||
attach_input_layout_gpu::attach_input_layout_gpu() {
|
||||
implementation_map<input_layout>::add({{engine_types::ocl, wait_for_events_gpu::create_input_layout}});
|
||||
attach_input_layout_common::attach_input_layout_common() {
|
||||
implementation_map<input_layout>::add(impl_types::common, wait_for_events_impl::create_input_layout, {});
|
||||
}
|
||||
|
||||
attach_prior_box_gpu::attach_prior_box_gpu() {
|
||||
implementation_map<prior_box>::add({{engine_types::ocl, wait_for_events_gpu::create_prior_box}});
|
||||
attach_prior_box_common::attach_prior_box_common() {
|
||||
implementation_map<prior_box>::add(impl_types::common, wait_for_events_impl::create_prior_box, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace common
|
||||
} // namespace cldnn
|
@ -4,9 +4,9 @@
|
||||
|
||||
#include "detection_output_inst.h"
|
||||
#include "network_impl.h"
|
||||
#include "implementation_map.h"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "math_utils.h"
|
||||
#include "register_gpu.hpp"
|
||||
#include "register.hpp"
|
||||
#include "cpu_impl_helpers.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
@ -24,7 +24,7 @@
|
||||
#endif
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace cpu {
|
||||
|
||||
namespace {
|
||||
using bounding_box = cldnn::cpu::bounding_box;
|
||||
@ -43,15 +43,15 @@ bool comp_score_descend<std::pair<int, int>>(const std::pair<float, std::pair<in
|
||||
}
|
||||
|
||||
/************************ Detection Output CPU ************************/
|
||||
struct detection_output_cpu : typed_primitive_impl<detection_output> {
|
||||
struct detection_output_impl : typed_primitive_impl<detection_output> {
|
||||
enum NMSType {CAFFE, MXNET};
|
||||
const detection_output_node& outer;
|
||||
NMSType nms_type;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<detection_output_cpu>(*this);
|
||||
return make_unique<detection_output_impl>(*this);
|
||||
}
|
||||
explicit detection_output_cpu(const detection_output_node& outer)
|
||||
explicit detection_output_impl(const detection_output_node& outer)
|
||||
: outer(outer)
|
||||
, nms_type(outer.get_primitive()->decrease_label_id ? MXNET : CAFFE) {}
|
||||
|
||||
@ -822,17 +822,19 @@ struct detection_output_cpu : typed_primitive_impl<detection_output> {
|
||||
|
||||
void init_kernels() override {}
|
||||
|
||||
static primitive_impl* create(const detection_output_node& arg) { return new detection_output_cpu(arg); }
|
||||
static primitive_impl* create(const detection_output_node& arg) { return new detection_output_impl(arg); }
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_detection_output_gpu::attach_detection_output_gpu() {
|
||||
implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), detection_output_cpu::create);
|
||||
implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), detection_output_cpu::create);
|
||||
attach_detection_output_impl::attach_detection_output_impl() {
|
||||
implementation_map<detection_output>::add(impl_types::cpu, detection_output_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx)
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace cpu
|
||||
} // namespace cldnn
|
@ -5,7 +5,7 @@
|
||||
#include "non_max_suppression_inst.h"
|
||||
#include "primitive_inst.h"
|
||||
#include "network_impl.h"
|
||||
#include "register_gpu.hpp"
|
||||
#include "register.hpp"
|
||||
#include "cpu_impl_helpers.hpp"
|
||||
|
||||
#include <vector>
|
||||
@ -15,7 +15,7 @@
|
||||
#include <tuple>
|
||||
|
||||
namespace cldnn {
|
||||
namespace {
|
||||
namespace cpu {
|
||||
|
||||
using namespace cldnn::cpu;
|
||||
|
||||
@ -372,14 +372,14 @@ void run(non_max_suppression_inst& instance) {
|
||||
store_result(stream, instance.output_memory_ptr(), result);
|
||||
}
|
||||
|
||||
struct non_max_suppression_cpu : typed_primitive_impl<non_max_suppression> {
|
||||
struct non_max_suppression_impl : typed_primitive_impl<non_max_suppression> {
|
||||
using parent = typed_primitive_impl<non_max_suppression>;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<non_max_suppression_cpu>(*this);
|
||||
return make_unique<non_max_suppression_impl>(*this);
|
||||
}
|
||||
|
||||
non_max_suppression_cpu() : parent(kernel_selector::weights_reorder_params(), "non_max_suppression_cpu") {}
|
||||
non_max_suppression_impl() : parent(kernel_selector::weights_reorder_params(), "non_max_suppression_impl") {}
|
||||
|
||||
virtual event::ptr execute_impl(const std::vector<event::ptr>& event, typed_primitive_inst<non_max_suppression>& instance) {
|
||||
for (auto e : event) {
|
||||
@ -396,23 +396,20 @@ struct non_max_suppression_cpu : typed_primitive_impl<non_max_suppression> {
|
||||
}
|
||||
|
||||
static primitive_impl* create(const non_max_suppression_node&) {
|
||||
return new non_max_suppression_cpu();
|
||||
return new non_max_suppression_impl();
|
||||
}
|
||||
void init_kernels() override {}
|
||||
};
|
||||
} // namespace
|
||||
|
||||
namespace gpu {
|
||||
namespace detail {
|
||||
|
||||
attach_non_max_suppression_gpu::attach_non_max_suppression_gpu() {
|
||||
implementation_map<non_max_suppression>::add({
|
||||
{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), non_max_suppression_cpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), non_max_suppression_cpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), non_max_suppression_cpu::create}
|
||||
attach_non_max_suppression_impl::attach_non_max_suppression_impl() {
|
||||
implementation_map<non_max_suppression>::add(impl_types::cpu, non_max_suppression_impl::create, {
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cpu
|
||||
} // namespace cldnn
|
@ -4,10 +4,10 @@
|
||||
|
||||
#include "proposal_inst.h"
|
||||
#include "cldnn/runtime/engine.hpp"
|
||||
#include "implementation_map.h"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "network_impl.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "register_gpu.hpp"
|
||||
#include "register.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
@ -17,7 +17,7 @@
|
||||
#define EPSILON 0.00001f
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace cpu {
|
||||
|
||||
namespace {
|
||||
|
||||
@ -190,13 +190,13 @@ struct im_info_t {
|
||||
int min_bbox_y;
|
||||
};
|
||||
|
||||
struct proposal_gpu : typed_primitive_impl<proposal> {
|
||||
struct proposal_impl : typed_primitive_impl<proposal> {
|
||||
const proposal_node& outer;
|
||||
|
||||
explicit proposal_gpu(const proposal_node& arg) : outer(arg) {}
|
||||
explicit proposal_impl(const proposal_node& arg) : outer(arg) {}
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<proposal_gpu>(*this);
|
||||
return make_unique<proposal_impl>(*this);
|
||||
}
|
||||
|
||||
template <typename dtype>
|
||||
@ -442,19 +442,19 @@ struct proposal_gpu : typed_primitive_impl<proposal> {
|
||||
CLDNN_ERROR_MESSAGE(arg.id(), "image_info must have either 3, 4 or 6 items");
|
||||
}
|
||||
|
||||
return new proposal_gpu(arg);
|
||||
return new proposal_impl(arg);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_proposal_gpu::attach_proposal_gpu() {
|
||||
implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
proposal_gpu::create);
|
||||
implementation_map<proposal>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
proposal_gpu::create);
|
||||
attach_proposal_impl::attach_proposal_impl() {
|
||||
implementation_map<proposal>::add(impl_types::cpu, proposal_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx)
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace cpu
|
||||
} // namespace cldnn
|
20
inference-engine/thirdparty/clDNN/src/impls/cpu/register.cpp
vendored
Normal file
20
inference-engine/thirdparty/clDNN/src/impls/cpu/register.cpp
vendored
Normal file
@ -0,0 +1,20 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "register.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace cpu {
|
||||
|
||||
#define REGISTER_CPU(prim) \
|
||||
static detail::attach_##prim##_impl attach_##prim
|
||||
|
||||
void register_implementations() {
|
||||
REGISTER_CPU(detection_output);
|
||||
REGISTER_CPU(proposal);
|
||||
REGISTER_CPU(non_max_suppression);
|
||||
}
|
||||
|
||||
} // namespace cpu
|
||||
} // namespace cldnn
|
31
inference-engine/thirdparty/clDNN/src/impls/cpu/register.hpp
vendored
Normal file
31
inference-engine/thirdparty/clDNN/src/impls/cpu/register.hpp
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cldnn/primitives/detection_output.hpp"
|
||||
#include "cldnn/primitives/proposal.hpp"
|
||||
#include "cldnn/primitives/non_max_suppression.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace cpu {
|
||||
void register_implementations();
|
||||
|
||||
namespace detail {
|
||||
|
||||
|
||||
#define REGISTER_CPU(prim) \
|
||||
struct attach_##prim##_impl { \
|
||||
attach_##prim##_impl(); \
|
||||
}
|
||||
|
||||
REGISTER_CPU(proposal);
|
||||
REGISTER_CPU(non_max_suppression);
|
||||
REGISTER_CPU(detection_output);
|
||||
|
||||
#undef REGISTER_CPU
|
||||
|
||||
} // namespace detail
|
||||
} // namespace cpu
|
||||
} // namespace cldnn
|
188
inference-engine/thirdparty/clDNN/src/impls/implementation_map.hpp
vendored
Normal file
188
inference-engine/thirdparty/clDNN/src/impls/implementation_map.hpp
vendored
Normal file
@ -0,0 +1,188 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <functional>
|
||||
#include <typeinfo>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
|
||||
namespace cldnn {
|
||||
|
||||
template <typename T, typename U>
|
||||
class singleton_map : public std::map<T, U> {
|
||||
singleton_map() : std::map<T, U>() {}
|
||||
singleton_map(singleton_map const&) = delete;
|
||||
void operator=(singleton_map const&) = delete;
|
||||
|
||||
public:
|
||||
static singleton_map& instance() {
|
||||
static singleton_map instance_;
|
||||
return instance_;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct permute;
|
||||
struct reorder;
|
||||
struct custom_gpu_primitive;
|
||||
struct generic_layer;
|
||||
struct reshape;
|
||||
struct data;
|
||||
struct mutable_data;
|
||||
struct input_layout;
|
||||
struct prior_box;
|
||||
struct loop;
|
||||
|
||||
struct primitive_impl;
|
||||
|
||||
template <class PType>
|
||||
struct typed_program_node;
|
||||
|
||||
template <typename primitive_kind>
|
||||
struct implementation_key {
|
||||
typedef std::tuple<data_types, format::type> type;
|
||||
type operator()(const typed_program_node<primitive_kind>& primitive) {
|
||||
return std::make_tuple(primitive.get_dependency(0).get_output_layout().data_type,
|
||||
primitive.get_dependency(0).get_output_layout().format);
|
||||
}
|
||||
type operator()(const layout& proposed_layout) {
|
||||
return std::make_tuple(proposed_layout.data_type, proposed_layout.format);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct implementation_key<permute> {
|
||||
typedef int32_t type;
|
||||
type operator()(const typed_program_node<permute>&) { return -1; }
|
||||
type operator()(const layout&) { return -1; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct implementation_key<reorder> {
|
||||
typedef int32_t type;
|
||||
type operator()(const typed_program_node<reorder>&) { return -1; }
|
||||
type operator()(const layout&) { return -1; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct implementation_key<generic_layer> {
|
||||
typedef int32_t type;
|
||||
type operator()(const typed_program_node<generic_layer>&) { return -1; }
|
||||
type operator()(const layout&) { return -1; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct implementation_key<custom_gpu_primitive> {
|
||||
typedef int32_t type;
|
||||
type operator()(const typed_program_node<custom_gpu_primitive>&) { return -1; }
|
||||
type operator()(const layout&) { return -1; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct implementation_key<reshape> {
|
||||
typedef int32_t type;
|
||||
type operator()(const typed_program_node<reshape>&) { return -1; }
|
||||
type operator()(const layout&) { return -1; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct implementation_key<data> {
|
||||
typedef int32_t type;
|
||||
type operator()(const typed_program_node<data>&) { return -1; }
|
||||
type operator()(const layout&) { return -1; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct implementation_key<mutable_data> {
|
||||
typedef int32_t type;
|
||||
type operator()(const typed_program_node<mutable_data>&) { return -1; }
|
||||
type operator()(const layout&) { return -1; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct implementation_key<input_layout> {
|
||||
typedef int32_t type;
|
||||
type operator()(const typed_program_node<input_layout>&) { return -1; }
|
||||
type operator()(const layout&) { return -1; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct implementation_key<prior_box> {
|
||||
typedef int32_t type;
|
||||
type operator()(const typed_program_node<prior_box>&) { return -1; }
|
||||
type operator()(const layout&) { return -1; }
|
||||
};
|
||||
|
||||
template <>
|
||||
struct implementation_key<loop> {
|
||||
typedef int32_t type;
|
||||
type operator()(const typed_program_node<loop>&) { return -1; }
|
||||
type operator()(const layout&) { return -1; }
|
||||
};
|
||||
|
||||
template <typename primitive_kind>
|
||||
class implementation_map {
|
||||
public:
|
||||
using key_builder = implementation_key<primitive_kind>;
|
||||
using key_type = typename key_builder::type;
|
||||
using factory_type = std::function<primitive_impl*(const typed_program_node<primitive_kind>&)>;
|
||||
using map_type = singleton_map<impl_types, std::pair<std::set<key_type>, factory_type>>;
|
||||
|
||||
static factory_type get(const typed_program_node<primitive_kind>& primitive) {
|
||||
impl_types target_impl_type = primitive.get_preferred_impl_type();
|
||||
// lookup in database; throw if not found
|
||||
auto key = key_builder()(primitive);
|
||||
for (auto& kv : map_type::instance()) {
|
||||
impl_types impl_type = kv.first;
|
||||
if ((target_impl_type & impl_type) != impl_type)
|
||||
continue;
|
||||
|
||||
std::set<key_type>& keys_set = kv.second.first;
|
||||
auto& factory = kv.second.second;
|
||||
if (keys_set.empty() || keys_set.find(key) != keys_set.end()) {
|
||||
return factory;
|
||||
}
|
||||
}
|
||||
throw std::runtime_error(std::string("implementation_map for ") + typeid(primitive_kind).name() +
|
||||
" could not find any implementation to match key");
|
||||
}
|
||||
|
||||
// check if for a given engine and type there exist an implementation
|
||||
static bool check(const typed_program_node<primitive_kind>& primitive) {
|
||||
impl_types target_impl_type = primitive.get_preferred_impl_type();
|
||||
auto key = key_builder()(primitive);
|
||||
return check_key(target_impl_type, key);
|
||||
}
|
||||
|
||||
// check if there exists a kernel implementation of a primitive with output set it primitive's output layout
|
||||
static bool check_io_eq(const typed_program_node<primitive_kind>& primitive) {
|
||||
impl_types target_impl_type = primitive.get_preferred_impl_type();
|
||||
auto key = key_builder()(primitive.get_output_layout());
|
||||
return check_key(target_impl_type, key);
|
||||
}
|
||||
|
||||
static bool check_key(impl_types target_impl_type, key_type key) {
|
||||
for (auto& kv : map_type::instance()) {
|
||||
impl_types impl_type = kv.first;
|
||||
if ((target_impl_type & impl_type) != impl_type)
|
||||
continue;
|
||||
std::set<key_type>& keys_set = kv.second.first;
|
||||
if (keys_set.empty())
|
||||
return true;
|
||||
return keys_set.find(key) != keys_set.end();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void add(impl_types impl_type, factory_type factory, std::set<key_type> keys) {
|
||||
if (impl_type == impl_types::any) {
|
||||
throw std::runtime_error("[CLDNN] Can't register impl with type any");
|
||||
}
|
||||
map_type::instance().insert({impl_type, {keys, factory}});
|
||||
}
|
||||
};
|
||||
} // namespace cldnn
|
123
inference-engine/thirdparty/clDNN/src/impls/ocl/activation.cpp
vendored
Normal file
123
inference-engine/thirdparty/clDNN/src/impls/ocl/activation.cpp
vendored
Normal file
@ -0,0 +1,123 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "activation_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "activation/activation_kernel_selector.h"
|
||||
#include "activation/activation_kernel_base.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct activation_impl : typed_primitive_impl_ocl<activation> {
|
||||
using parent = typed_primitive_impl_ocl<activation>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<activation_impl>(*this);
|
||||
}
|
||||
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<activation>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
|
||||
if (_outer.is_parameterized()) {
|
||||
args.slope = instance.slope_memory();
|
||||
}
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
static primitive_impl* create(const activation_node& arg) {
|
||||
auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
|
||||
auto activation_optional_params =
|
||||
get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
|
||||
|
||||
convert_new_activation_func(arg.get_primitive(), activation_params.activations);
|
||||
|
||||
if (arg.is_parameterized()) {
|
||||
const auto& slope_layout = arg.slope_input().get_output_layout();
|
||||
const auto& output_layout = arg.get_output_layout();
|
||||
|
||||
const auto params_num =
|
||||
kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activations[0].function);
|
||||
|
||||
CLDNN_ERROR_LESS_THAN(arg.id(),
|
||||
"Slope layout size count",
|
||||
slope_layout.size.count(),
|
||||
"output_layout.size.feature[0] * params_num",
|
||||
static_cast<size_t>(output_layout.size.feature[0] * params_num),
|
||||
"Error - not enough data inside additional params buffer");
|
||||
|
||||
activation_params.inputActivationParams.push_back(convert_data_tensor(slope_layout));
|
||||
}
|
||||
|
||||
auto& kernel_selector = kernel_selector::activation_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(activation_params, activation_optional_params);
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto activation = new activation_impl(arg, best_kernels[0]);
|
||||
|
||||
return activation;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_activation_impl::attach_activation_impl() {
|
||||
implementation_map<activation>::add(impl_types::ocl, activation_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::u8, format::yxfb),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::byxf),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::byxf),
|
||||
std::make_tuple(data_types::i32, format::yxfb),
|
||||
// block f16 format
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
// 3D
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
||||
// bfwzyx
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
// fs_b_yx_fsv32
|
||||
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "arg_max_min_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "arg_max_min/arg_max_min_kernel_selector.h"
|
||||
@ -12,14 +12,14 @@
|
||||
#include "kernel_runner.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct arg_max_min_gpu : typed_primitive_gpu_impl<arg_max_min> {
|
||||
using parent = typed_primitive_gpu_impl<arg_max_min>;
|
||||
struct arg_max_min_impl : typed_primitive_impl_ocl<arg_max_min> {
|
||||
using parent = typed_primitive_impl_ocl<arg_max_min>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<arg_max_min_gpu>(*this);
|
||||
return make_unique<arg_max_min_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -98,37 +98,27 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto conv = new arg_max_min_gpu(arg, best_kernels[0]);
|
||||
auto conv = new arg_max_min_impl(arg, best_kernels[0]);
|
||||
|
||||
return conv;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_arg_max_min_gpu::attach_arg_max_min_gpu() {
|
||||
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
arg_max_min_gpu::create);
|
||||
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
arg_max_min_gpu::create);
|
||||
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx),
|
||||
arg_max_min_gpu::create);
|
||||
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
|
||||
arg_max_min_gpu::create);
|
||||
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
|
||||
arg_max_min_gpu::create);
|
||||
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
|
||||
arg_max_min_gpu::create);
|
||||
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
|
||||
arg_max_min_gpu::create);
|
||||
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
|
||||
arg_max_min_gpu::create);
|
||||
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
|
||||
arg_max_min_gpu::create);
|
||||
implementation_map<arg_max_min>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
|
||||
arg_max_min_gpu::create);
|
||||
}
|
||||
|
||||
attach_arg_max_min_impl::attach_arg_max_min_impl() {
|
||||
implementation_map<arg_max_min>::add(impl_types::ocl, arg_max_min_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
});
|
||||
}
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
79
inference-engine/thirdparty/clDNN/src/impls/ocl/average_unpooling.cpp
vendored
Normal file
79
inference-engine/thirdparty/clDNN/src/impls/ocl/average_unpooling.cpp
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "average_unpooling_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "average_unpooling/average_unpooling_kernel_selector.h"
|
||||
#include "average_unpooling/average_unpooling_kernel_base.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct average_unpooling_impl : typed_primitive_impl_ocl<average_unpooling> {
|
||||
using parent = typed_primitive_impl_ocl<average_unpooling>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<average_unpooling_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<average_unpooling>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const average_unpooling_node& arg) {
|
||||
auto average_unpooling_params = get_default_params<kernel_selector::average_unpooling_params>(arg);
|
||||
auto average_unpooling_optional_params =
|
||||
get_default_optional_params<kernel_selector::average_unpooling_optional_params>(arg.get_program());
|
||||
auto& params = average_unpooling_params;
|
||||
|
||||
auto primitive = arg.get_primitive();
|
||||
auto stride = primitive->stride;
|
||||
|
||||
params.unpoolSize = {
|
||||
(uint32_t)primitive->size.spatial[0],
|
||||
(uint32_t)primitive->size.spatial[1],
|
||||
};
|
||||
|
||||
params.unpoolStride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1]};
|
||||
|
||||
auto& kernel_selector = kernel_selector::average_unpooling_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(average_unpooling_params, average_unpooling_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto average_unpool = new average_unpooling_impl(arg, best_kernels[0]);
|
||||
|
||||
return average_unpool;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_average_unpooling_impl::attach_average_unpooling_impl() {
|
||||
implementation_map<average_unpooling>::add(impl_types::ocl, average_unpooling_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
78
inference-engine/thirdparty/clDNN/src/impls/ocl/batch_to_space.cpp
vendored
Normal file
78
inference-engine/thirdparty/clDNN/src/impls/ocl/batch_to_space.cpp
vendored
Normal file
@ -0,0 +1,78 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "batch_to_space_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "batch_to_space/batch_to_space_kernel_selector.h"
|
||||
#include "batch_to_space/batch_to_space_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "data_inst.h"
|
||||
#include <vector>
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
struct batch_to_space_impl : typed_primitive_impl_ocl<batch_to_space> {
|
||||
using parent = typed_primitive_impl_ocl<batch_to_space>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<batch_to_space_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const batch_to_space_node& arg) {
|
||||
auto batch_to_space_params = get_default_params<kernel_selector::batch_to_space_params>(arg);
|
||||
auto batch_to_space_optional_params =
|
||||
get_default_optional_params<kernel_selector::batch_to_space_optional_params>(arg.get_program());
|
||||
|
||||
auto primitive = arg.get_primitive();
|
||||
|
||||
batch_to_space_params.block_shape = convert_dim_vector(primitive->block_shape);
|
||||
batch_to_space_params.crops_begin = convert_dim_vector(primitive->crops_begin);
|
||||
batch_to_space_params.crops_end = convert_dim_vector(primitive->crops_end);
|
||||
|
||||
auto& kernel_selector = kernel_selector::batch_to_space_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(batch_to_space_params, batch_to_space_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto batch_to_space = new batch_to_space_impl(arg, best_kernels[0]);
|
||||
|
||||
return batch_to_space;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_batch_to_space_impl::attach_batch_to_space_impl() {
|
||||
implementation_map<batch_to_space>::add(impl_types::ocl, batch_to_space_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -5,8 +5,8 @@
|
||||
#include "cldnn/primitives/scale.hpp"
|
||||
#include "cldnn/primitives/quantize.hpp"
|
||||
#include "binary_convolution_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "kernel_runner.h"
|
||||
@ -16,14 +16,14 @@
|
||||
#include <memory>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct binary_convolution_gpu : typed_primitive_gpu_impl<binary_convolution> {
|
||||
using parent = typed_primitive_gpu_impl<binary_convolution>;
|
||||
struct binary_convolution_impl : typed_primitive_impl_ocl<binary_convolution> {
|
||||
using parent = typed_primitive_impl_ocl<binary_convolution>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<binary_convolution_gpu>(*this);
|
||||
return make_unique<binary_convolution_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -125,7 +125,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto conv = new binary_convolution_gpu(arg, best_kernels[0]);
|
||||
auto conv = new binary_convolution_impl(arg, best_kernels[0]);
|
||||
|
||||
return conv;
|
||||
}
|
||||
@ -133,12 +133,12 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_binary_convolution_gpu::attach_binary_convolution_gpu() {
|
||||
implementation_map<binary_convolution>::add(
|
||||
std::make_tuple(engine_types::ocl, data_types::bin, format::b_fs_yx_32fp),
|
||||
binary_convolution_gpu::create);
|
||||
attach_binary_convolution_impl::attach_binary_convolution_impl() {
|
||||
implementation_map<binary_convolution>::add(impl_types::ocl, binary_convolution_impl::create, {
|
||||
std::make_tuple(data_types::bin, format::b_fs_yx_32fp),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
96
inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp
vendored
Normal file
96
inference-engine/thirdparty/clDNN/src/impls/ocl/border.cpp
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "border_inst.h"
|
||||
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "border/border_kernel_selector.h"
|
||||
#include "border/border_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct border_impl : typed_primitive_impl_ocl<border> {
|
||||
using parent = typed_primitive_impl_ocl<border>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<border_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const border_node& arg) {
|
||||
auto b_params = get_default_params<kernel_selector::border_params>(arg, 1);
|
||||
auto b_optional_params =
|
||||
get_default_optional_params<kernel_selector::border_optional_params>(arg.get_program());
|
||||
|
||||
auto desc = arg.get_primitive();
|
||||
|
||||
b_params.lt_sizes = convert_dim_vector(desc->left_top_sizes);
|
||||
b_params.rb_sizes = convert_dim_vector(desc->right_bottom_sizes);
|
||||
b_params.border_value = desc->border_value;
|
||||
|
||||
switch (desc->type) {
|
||||
case border_type::constant:
|
||||
b_params.b_type = kernel_selector::border_type::CONSTANT;
|
||||
break;
|
||||
case border_type::edge:
|
||||
b_params.b_type = kernel_selector::border_type::EDGE;
|
||||
break;
|
||||
case border_type::mirror:
|
||||
b_params.b_type = kernel_selector::border_type::MIRROR;
|
||||
break;
|
||||
case border_type::mirror_101:
|
||||
b_params.b_type = kernel_selector::border_type::MIRROR_101;
|
||||
break;
|
||||
default:
|
||||
assert(
|
||||
false &&
|
||||
"Encountered unhandled enum case: border_type during translation to kernel selector enumeration.");
|
||||
}
|
||||
|
||||
auto& kernel_selector = kernel_selector::border_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(b_params, b_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
return new border_impl(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_border_impl::attach_border_impl() {
|
||||
implementation_map<border>::add(impl_types::ocl, border_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::u8, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::u8, format::byxf),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -4,22 +4,22 @@
|
||||
|
||||
#include "broadcast_inst.h"
|
||||
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "broadcast/broadcast_kernel_selector.h"
|
||||
#include "broadcast/broadcast_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct broadcast_gpu : typed_primitive_gpu_impl<broadcast> {
|
||||
using parent = typed_primitive_gpu_impl<broadcast>;
|
||||
struct broadcast_impl : typed_primitive_impl_ocl<broadcast> {
|
||||
using parent = typed_primitive_impl_ocl<broadcast>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<broadcast_gpu>(*this);
|
||||
return make_unique<broadcast_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const broadcast_node& arg) {
|
||||
@ -57,29 +57,29 @@ struct broadcast_gpu : typed_primitive_gpu_impl<broadcast> {
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
return new broadcast_gpu(arg, best_kernels[0]);
|
||||
return new broadcast_impl(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_broadcast_gpu::attach_broadcast_gpu() {
|
||||
auto val_fw = broadcast_gpu::create;
|
||||
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
|
||||
implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfzyx), val_fw);
|
||||
attach_broadcast_impl::attach_broadcast_impl() {
|
||||
implementation_map<broadcast>::add(impl_types::ocl, broadcast_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i64, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::i64, format::bfzyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
159
inference-engine/thirdparty/clDNN/src/impls/ocl/concatenation.cpp
vendored
Normal file
159
inference-engine/thirdparty/clDNN/src/impls/ocl/concatenation.cpp
vendored
Normal file
@ -0,0 +1,159 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "concatenation_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "concatenation/concatenation_kernel_selector.h"
|
||||
#include "concatenation/concatenation_kernel_base.h"
|
||||
|
||||
#include <initializer_list>
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
namespace {
|
||||
kernel_selector::concat_axis convert_axis(concatenation::concatenation_axis axis) {
|
||||
switch (axis) {
|
||||
case concatenation::along_x:
|
||||
return kernel_selector::concat_axis::X;
|
||||
case concatenation::along_y:
|
||||
return kernel_selector::concat_axis::Y;
|
||||
case concatenation::along_z:
|
||||
return kernel_selector::concat_axis::Z;
|
||||
case concatenation::along_w:
|
||||
return kernel_selector::concat_axis::W;
|
||||
case concatenation::along_f:
|
||||
return kernel_selector::concat_axis::FEATURE;
|
||||
case concatenation::along_b:
|
||||
return kernel_selector::concat_axis::BATCH;
|
||||
default:
|
||||
return kernel_selector::concat_axis::X;
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
struct concatenation_impl : typed_primitive_impl_ocl<concatenation> {
|
||||
using parent = typed_primitive_impl_ocl<concatenation>;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<concatenation_impl>(*this);
|
||||
}
|
||||
|
||||
concatenation_impl(const concatenation_node& arg, const kernel_selector::kernel_data& kd) : parent(arg, kd) {
|
||||
if (!_outer.can_be_optimized()) {
|
||||
CLDNN_ERROR_NOT_EQUAL(_outer.id(),
|
||||
"Input count",
|
||||
_outer.inputs_count(),
|
||||
"kds size",
|
||||
kd.kernels.size(),
|
||||
"Error - not enough kernels for concatenation");
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
bool optimized_out(concatenation_inst& instance) const override {
|
||||
return parent::optimized_out(instance) || _outer.can_be_optimized();
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const concatenation_node& arg) {
|
||||
if (arg.can_be_optimized()) {
|
||||
return new concatenation_impl(arg, {});
|
||||
}
|
||||
|
||||
auto concat_params = get_default_params<kernel_selector::concatenation_params>(arg);
|
||||
auto concat_optional_params =
|
||||
get_default_optional_params<kernel_selector::concatenation_optional_params>(arg.get_program());
|
||||
auto axis = arg.get_primitive()->axis;
|
||||
|
||||
concat_params.inputs.resize(arg.inputs_count());
|
||||
for (size_t i = 0; i < arg.inputs_count(); ++i) {
|
||||
const layout& input_layout = arg.input(i).get_output_layout();
|
||||
concat_params.inputs[i] = convert_data_tensor(input_layout);
|
||||
}
|
||||
|
||||
concat_params.axis = convert_axis(axis);
|
||||
concat_optional_params.kernelPerInput = true;
|
||||
|
||||
auto& kernel_selector = kernel_selector::concatenation_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(concat_params, concat_optional_params);
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
concatenation_impl* concat = new concatenation_impl(arg, best_kernels[0]);
|
||||
|
||||
return concat;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_concatenation_impl::attach_concatenation_impl() {
|
||||
implementation_map<concatenation>::add(impl_types::ocl, concatenation_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::u8, format::yxfb),
|
||||
std::make_tuple(data_types::i32, format::yxfb),
|
||||
std::make_tuple(data_types::i64, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i64, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::u8, format::byxf),
|
||||
std::make_tuple(data_types::i32, format::byxf),
|
||||
std::make_tuple(data_types::i64, format::byxf),
|
||||
std::make_tuple(data_types::f32, format::fyxb),
|
||||
std::make_tuple(data_types::f16, format::fyxb),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::i64, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
std::make_tuple(data_types::i64, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -4,8 +4,8 @@
|
||||
|
||||
#include "convolution_inst.h"
|
||||
#include "eltwise_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "kernel_runner.h"
|
||||
@ -15,14 +15,14 @@
|
||||
#include <memory>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct convolution_gpu : typed_primitive_gpu_impl<convolution> {
|
||||
using parent = typed_primitive_gpu_impl<convolution>;
|
||||
struct convolution_impl : typed_primitive_impl_ocl<convolution> {
|
||||
using parent = typed_primitive_impl_ocl<convolution>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<convolution_gpu>(*this);
|
||||
return make_unique<convolution_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -151,7 +151,7 @@ public:
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with these arguments");
|
||||
auto conv = new convolution_gpu(arg, best_kernels[0]);
|
||||
auto conv = new convolution_impl(arg, best_kernels[0]);
|
||||
|
||||
return conv;
|
||||
}
|
||||
@ -159,55 +159,49 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_convolution_gpu::attach_convolution_gpu() {
|
||||
auto val_fw = convolution_gpu::create;
|
||||
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::winograd_2x3_s1_data), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::winograd_2x3_s1_data), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
|
||||
// block f16 format
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||
// block i8 format
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), val_fw);
|
||||
// MMAD
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
|
||||
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw);
|
||||
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), val_fw);
|
||||
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw);
|
||||
attach_convolution_impl::attach_convolution_impl() {
|
||||
implementation_map<convolution>::add(impl_types::ocl, convolution_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::winograd_2x3_s1_data),
|
||||
std::make_tuple(data_types::f16, format::winograd_2x3_s1_data),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
118
inference-engine/thirdparty/clDNN/src/impls/ocl/crop.cpp
vendored
Normal file
118
inference-engine/thirdparty/clDNN/src/impls/ocl/crop.cpp
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "crop_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "eltwise/eltwise_kernel_selector.h"
|
||||
#include "eltwise/eltwise_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct crop_impl : typed_primitive_impl_ocl<crop> {
|
||||
using parent = typed_primitive_impl_ocl<crop>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<crop_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
bool optimized_out(crop_inst& instance) const override {
|
||||
return parent::optimized_out(instance) || _outer.can_be_optimized();
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const crop_node& arg) {
|
||||
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg, 1);
|
||||
auto ew_optional_params =
|
||||
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
|
||||
|
||||
ew_params.operations.push_back(
|
||||
{{kernel_selector::eltwise_params::InputType::Buffer(0)}, kernel_selector::eltwise_mode::ASSIGN});
|
||||
|
||||
const auto& input_layout = arg.input().get_output_layout();
|
||||
ew_params.inputs[0] = convert_data_tensor(input_layout, 1, arg.get_primitive()->offsets);
|
||||
|
||||
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto crop = new crop_impl(arg, best_kernels[0]);
|
||||
|
||||
return crop;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_crop_impl::attach_crop_impl() {
|
||||
implementation_map<crop>::add(impl_types::ocl, crop_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::i64, format::yxfb),
|
||||
std::make_tuple(data_types::i32, format::yxfb),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::u8, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i64, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i64, format::byxf),
|
||||
std::make_tuple(data_types::i32, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::u8, format::byxf),
|
||||
std::make_tuple(data_types::f32, format::fyxb),
|
||||
std::make_tuple(data_types::f16, format::fyxb),
|
||||
std::make_tuple(data_types::i64, format::fyxb),
|
||||
std::make_tuple(data_types::i32, format::fyxb),
|
||||
std::make_tuple(data_types::i8, format::fyxb),
|
||||
std::make_tuple(data_types::u8, format::fyxb),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i64, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i64, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "ctc_greedy_decoder_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "ctc_greedy_decoder/ctc_greedy_decoder_kernel_selector.h"
|
||||
@ -15,14 +15,14 @@
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct ctc_greedy_decoder_gpu : typed_primitive_gpu_impl<ctc_greedy_decoder> {
|
||||
using parent = typed_primitive_gpu_impl<ctc_greedy_decoder>;
|
||||
struct ctc_greedy_decoder_impl : typed_primitive_impl_ocl<ctc_greedy_decoder> {
|
||||
using parent = typed_primitive_impl_ocl<ctc_greedy_decoder>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<ctc_greedy_decoder_gpu>(*this);
|
||||
return make_unique<ctc_greedy_decoder_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -51,7 +51,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto grn = new ctc_greedy_decoder_gpu(arg, best_kernels[0]);
|
||||
auto grn = new ctc_greedy_decoder_impl(arg, best_kernels[0]);
|
||||
|
||||
return grn;
|
||||
}
|
||||
@ -59,13 +59,15 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_ctc_greedy_decoder_gpu::attach_ctc_greedy_decoder_gpu() {
|
||||
implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), ctc_greedy_decoder_gpu::create);
|
||||
implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), ctc_greedy_decoder_gpu::create);
|
||||
implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), ctc_greedy_decoder_gpu::create);
|
||||
implementation_map<ctc_greedy_decoder>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), ctc_greedy_decoder_gpu::create);
|
||||
attach_ctc_greedy_decoder_impl::attach_ctc_greedy_decoder_impl() {
|
||||
implementation_map<ctc_greedy_decoder>::add(impl_types::ocl, ctc_greedy_decoder_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i64, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "cum_sum_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "cum_sum/cum_sum_kernel_selector.h"
|
||||
#include "cum_sum/cum_sum_kernel_ref.h"
|
||||
@ -13,7 +13,7 @@
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
namespace {
|
||||
kernel_selector::cum_sum_axis convert_axis(cum_sum::cum_sum_axis axis) {
|
||||
@ -36,12 +36,12 @@ kernel_selector::cum_sum_axis convert_axis(cum_sum::cum_sum_axis axis) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
struct cum_sum_gpu : typed_primitive_gpu_impl<cum_sum> {
|
||||
using parent = typed_primitive_gpu_impl<cum_sum>;
|
||||
struct cum_sum_impl : typed_primitive_impl_ocl<cum_sum> {
|
||||
using parent = typed_primitive_impl_ocl<cum_sum>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<cum_sum_gpu>(*this);
|
||||
return make_unique<cum_sum_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -62,7 +62,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto cum_sum = new cum_sum_gpu(arg, best_kernels[0]);
|
||||
auto cum_sum = new cum_sum_impl(arg, best_kernels[0]);
|
||||
|
||||
return cum_sum;
|
||||
}
|
||||
@ -70,16 +70,17 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_cum_sum_gpu::attach_cum_sum_gpu() {
|
||||
auto val_fw = cum_sum_gpu::create;
|
||||
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<cum_sum>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
attach_cum_sum_impl::attach_cum_sum_impl() {
|
||||
implementation_map<cum_sum>::add(impl_types::ocl, cum_sum_impl::create, {
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -4,12 +4,12 @@
|
||||
|
||||
#include "custom_gpu_primitive_inst.h"
|
||||
#include "cldnn/runtime/engine.hpp"
|
||||
#include "implementation_map.h"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "network_impl.h"
|
||||
#include "jitter.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "register_gpu.hpp"
|
||||
#include "register.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
@ -17,24 +17,24 @@
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
using namespace cldnn;
|
||||
namespace kernel_selector {
|
||||
using jit_constants = kernel_selector::JitConstants;
|
||||
}
|
||||
|
||||
namespace neural {
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct custom_gpu_primitive_gpu : typed_primitive_impl<custom_gpu_primitive> {
|
||||
struct custom_gpu_primitive_impl : typed_primitive_impl<custom_gpu_primitive> {
|
||||
const custom_gpu_primitive_node& outer;
|
||||
std::shared_ptr<kernel_selector::cl_kernel_data> cl_kernel;
|
||||
std::vector<kernel::ptr> _kernels;
|
||||
kernel_id _kernel_id;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<custom_gpu_primitive_gpu>(*this);
|
||||
return make_unique<custom_gpu_primitive_impl>(*this);
|
||||
}
|
||||
|
||||
custom_gpu_primitive_gpu(const custom_gpu_primitive_gpu& other)
|
||||
custom_gpu_primitive_impl(const custom_gpu_primitive_impl& other)
|
||||
: outer(other.outer)
|
||||
, cl_kernel(other.cl_kernel)
|
||||
, _kernels({})
|
||||
@ -42,7 +42,7 @@ struct custom_gpu_primitive_gpu : typed_primitive_impl<custom_gpu_primitive> {
|
||||
_kernels.emplace_back(std::move(outer.get_program().get_kernel(_kernel_id)->clone()));
|
||||
}
|
||||
|
||||
custom_gpu_primitive_gpu(const custom_gpu_primitive_node& arg,
|
||||
custom_gpu_primitive_impl(const custom_gpu_primitive_node& arg,
|
||||
std::shared_ptr<kernel_selector::cl_kernel_data>& cl_kernel)
|
||||
: outer(arg)
|
||||
, cl_kernel(cl_kernel)
|
||||
@ -224,14 +224,15 @@ static primitive_impl* create(const custom_gpu_primitive_node& arg) {
|
||||
cl_kernel->params.arguments.push_back(get_arg(p));
|
||||
}
|
||||
|
||||
return new custom_gpu_primitive_gpu(arg, cl_kernel);
|
||||
}
|
||||
} // namespace neural
|
||||
|
||||
namespace cldnn { namespace gpu { namespace detail {
|
||||
|
||||
attach_custom_gpu_primitive_gpu::attach_custom_gpu_primitive_gpu() {
|
||||
implementation_map<custom_gpu_primitive>::add({{cldnn::engine_types::ocl, neural::create}});
|
||||
return new custom_gpu_primitive_impl(arg, cl_kernel);
|
||||
}
|
||||
|
||||
} } } // namespace cldnn::gpu::detail
|
||||
namespace detail {
|
||||
|
||||
attach_custom_gpu_primitive_impl::attach_custom_gpu_primitive_impl() {
|
||||
implementation_map<custom_gpu_primitive>::add(cldnn::impl_types::ocl, create, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
148
inference-engine/thirdparty/clDNN/src/impls/ocl/deconvolution.cpp
vendored
Normal file
148
inference-engine/thirdparty/clDNN/src/impls/ocl/deconvolution.cpp
vendored
Normal file
@ -0,0 +1,148 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "deconvolution_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "deconvolution/deconvolution_kernel_selector.h"
|
||||
#include "deconvolution/deconvolution_kernel_base.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct deconvolution_impl : typed_primitive_impl_ocl<deconvolution> {
|
||||
using parent = typed_primitive_impl_ocl<deconvolution>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<deconvolution_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
// TODO: share it with convolution and fully connected
|
||||
bool validate_impl(const typed_primitive_inst<deconvolution>&) const override {
|
||||
bool res = true;
|
||||
|
||||
CLDNN_ERROR_NOT_EQUAL(_outer.id(),
|
||||
"deconvolution filling value",
|
||||
_outer.get_output_layout().data_padding.filling_value(),
|
||||
"padding mode",
|
||||
0.0f,
|
||||
"Unknown padding mode in deconvolution.");
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<deconvolution>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
|
||||
args.weights = instance.weights_memory(split);
|
||||
args.bias = instance.bias_term() ? instance.bias_memory(split) : nullptr;
|
||||
|
||||
return args;
|
||||
}
|
||||
|
||||
int32_t get_split() const override { return _outer.get_split(); }
|
||||
|
||||
uint32_t get_groups() const override { return _outer.get_groups(); }
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const deconvolution_node& arg) {
|
||||
const auto& primitive = arg.get_primitive();
|
||||
const auto& weights_layout = arg.weights(0).get_output_layout();
|
||||
|
||||
const auto& weights_size = weights_layout.size;
|
||||
|
||||
const auto& split = primitive->split();
|
||||
const auto& stride = primitive->stride;
|
||||
#if 0 // TODO: support dilation
|
||||
const auto& dilation = primitive->dilation;
|
||||
#else
|
||||
const tensor dilation = {0, 0, 1, 1, 1};
|
||||
#endif
|
||||
const auto actual_split = split;
|
||||
|
||||
const auto& input_offset = primitive->input_offset;
|
||||
const auto& groups = primitive->groups;
|
||||
|
||||
auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(
|
||||
arg,
|
||||
(groups > 1) ? 1 : actual_split,
|
||||
1,
|
||||
primitive->grouped_weights_shape);
|
||||
auto deconv_optional_params =
|
||||
get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
|
||||
|
||||
deconv_params.split = split;
|
||||
deconv_params.groups = groups;
|
||||
|
||||
auto spatial_size = arg.get_output_layout().format.dimension() - 2;
|
||||
uint32_t kx = weights_size.spatial[0];
|
||||
uint32_t ky = weights_size.spatial[1];
|
||||
uint32_t kz = spatial_size == 2 ? 1 : weights_size.spatial[2];
|
||||
deconv_params.filterSize = { kx, ky, kz };
|
||||
|
||||
deconv_params.padding = {(uint32_t)std::max(-input_offset.spatial[0], 0),
|
||||
(uint32_t)std::max(-input_offset.spatial[1], 0),
|
||||
(uint32_t)std::max(-input_offset.spatial[2], 0)};
|
||||
|
||||
deconv_params.stride = {(uint32_t)stride.spatial[0], (uint32_t)stride.spatial[1], (uint32_t)stride.spatial[2]};
|
||||
|
||||
deconv_params.dilation = {(uint32_t)dilation.spatial[0],
|
||||
(uint32_t)dilation.spatial[1],
|
||||
(uint32_t)dilation.spatial[2]};
|
||||
|
||||
auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with these arguments");
|
||||
auto deconv = new deconvolution_impl(arg, best_kernels[0]);
|
||||
|
||||
return deconv;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_deconvolution_impl::attach_deconvolution_impl() {
|
||||
implementation_map<deconvolution>::add(impl_types::ocl, deconvolution_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "deformable_convolution_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "kernel_runner.h"
|
||||
@ -13,14 +13,14 @@
|
||||
#include <algorithm>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct deformable_conv_gpu : typed_primitive_gpu_impl<deformable_conv> {
|
||||
using parent = typed_primitive_gpu_impl<deformable_conv>;
|
||||
struct deformable_conv_impl : typed_primitive_impl_ocl<deformable_conv> {
|
||||
using parent = typed_primitive_impl_ocl<deformable_conv>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<deformable_conv_gpu>(*this);
|
||||
return make_unique<deformable_conv_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -71,18 +71,18 @@ public:
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with these arguments");
|
||||
auto conv = new deformable_conv_gpu(arg, best_kernels[0]);
|
||||
auto conv = new deformable_conv_impl(arg, best_kernels[0]);
|
||||
|
||||
return conv;
|
||||
}
|
||||
};
|
||||
|
||||
struct deformable_interp_gpu : typed_primitive_gpu_impl<deformable_interp> {
|
||||
using parent = typed_primitive_gpu_impl<deformable_interp>;
|
||||
struct deformable_interp_impl : typed_primitive_impl_ocl<deformable_interp> {
|
||||
using parent = typed_primitive_impl_ocl<deformable_interp>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<deformable_interp_gpu>(*this);
|
||||
return make_unique<deformable_interp_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -139,7 +139,7 @@ public:
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with these arguments");
|
||||
auto conv = new deformable_interp_gpu(arg, best_kernels[0]);
|
||||
auto conv = new deformable_interp_impl(arg, best_kernels[0]);
|
||||
|
||||
return conv;
|
||||
}
|
||||
@ -147,20 +147,20 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_deformable_conv_gpu::attach_deformable_conv_gpu() {
|
||||
implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
deformable_conv_gpu::create);
|
||||
implementation_map<deformable_conv>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
deformable_conv_gpu::create);
|
||||
attach_deformable_conv_impl::attach_deformable_conv_impl() {
|
||||
implementation_map<deformable_conv>::add(impl_types::ocl, deformable_conv_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
attach_deformable_interp_gpu::attach_deformable_interp_gpu() {
|
||||
implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
deformable_interp_gpu::create);
|
||||
implementation_map<deformable_interp>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
deformable_interp_gpu::create);
|
||||
attach_deformable_interp_impl::attach_deformable_interp_impl() {
|
||||
implementation_map<deformable_interp>::add(impl_types::ocl, deformable_interp_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
71
inference-engine/thirdparty/clDNN/src/impls/ocl/depth_to_space.cpp
vendored
Normal file
71
inference-engine/thirdparty/clDNN/src/impls/ocl/depth_to_space.cpp
vendored
Normal file
@ -0,0 +1,71 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "depth_to_space_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "depth_to_space/depth_to_space_kernel_selector.h"
|
||||
#include "depth_to_space/depth_to_space_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "common_types.h"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
struct depth_to_space_impl : typed_primitive_impl_ocl<depth_to_space> {
|
||||
using parent = typed_primitive_impl_ocl<depth_to_space>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<depth_to_space_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const depth_to_space_node& arg) {
|
||||
auto depth_to_space_params = get_default_params<kernel_selector::depth_to_space_params>(arg);
|
||||
auto depth_to_space_optional_params =
|
||||
get_default_optional_params<kernel_selector::depth_to_space_optional_params>(arg.get_program());
|
||||
|
||||
depth_to_space_params.block_size = arg.get_primitive()->block_size;
|
||||
depth_to_space_params.mode = arg.get_primitive()->mode == depth_to_space_mode::blocks_first ? kernel_selector::depth_to_space_mode::BLOCKS_FIRST
|
||||
: kernel_selector::depth_to_space_mode::DEPTH_FIRST;
|
||||
|
||||
auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(depth_to_space_params, depth_to_space_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto depth_to_space = new depth_to_space_impl(arg, best_kernels[0]);
|
||||
|
||||
return depth_to_space;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_depth_to_space_impl::attach_depth_to_space_impl() {
|
||||
implementation_map<depth_to_space>::add(impl_types::ocl, depth_to_space_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
188
inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp
vendored
Normal file
188
inference-engine/thirdparty/clDNN/src/impls/ocl/eltwise.cpp
vendored
Normal file
@ -0,0 +1,188 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "eltwise_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "eltwise/eltwise_kernel_selector.h"
|
||||
#include "eltwise/eltwise_kernel_base.h"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct eltwise_impl : typed_primitive_impl_ocl<eltwise> {
|
||||
using parent = typed_primitive_impl_ocl<eltwise>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<eltwise_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<eltwise>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const eltwise_node& arg) {
|
||||
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
|
||||
auto ew_optional_params =
|
||||
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
|
||||
|
||||
for (size_t i = 1; i < arg.inputs_count(); i++) {
|
||||
ew_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
|
||||
}
|
||||
|
||||
const auto& primitive = arg.get_primitive();
|
||||
|
||||
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
|
||||
kernel_selector::eltwise_params::InputType::Buffer(1)},
|
||||
convert_to_eltwise_mode(primitive->mode)});
|
||||
|
||||
for (uint32_t i = 2; i < static_cast<uint32_t>(arg.inputs_count()); i++) {
|
||||
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(i - 2),
|
||||
kernel_selector::eltwise_params::InputType::Buffer(i)},
|
||||
convert_to_eltwise_mode(primitive->mode)});
|
||||
}
|
||||
|
||||
if (primitive->mode == eltwise_mode::sum) {
|
||||
ew_params.coefficients = primitive->coefficients;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < ew_params.inputs.size(); i++) {
|
||||
if (!ew_params.inputs[i].SameDims(ew_params.output)) {
|
||||
std::vector<int32_t> input_size = arg.input(i).get_output_layout().size.raw.vector();
|
||||
std::vector<int32_t> output_size = arg.get_output_layout().size.raw.vector();
|
||||
bool broadcast = false;
|
||||
for (size_t d = 0; d < output_size.size(); d++) {
|
||||
if (output_size[d] != 1 && input_size[d] == 1)
|
||||
broadcast = true;
|
||||
}
|
||||
if (broadcast) {
|
||||
ew_params.broadcast = true;
|
||||
break;
|
||||
} else {
|
||||
ew_params.layoutBased = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// stride
|
||||
if (!primitive->stride.empty()) {
|
||||
const auto& stride = primitive->stride;
|
||||
ew_params.stride.resize(stride.size());
|
||||
for (size_t i = 0; i < primitive->stride.size(); i++) {
|
||||
ew_params.stride[i] = {(uint32_t)stride[i].spatial[0],
|
||||
(uint32_t)stride[i].spatial[1],
|
||||
(uint32_t)stride[i].spatial[2]};
|
||||
}
|
||||
}
|
||||
|
||||
// check if strides are the same
|
||||
if (!ew_params.stride.empty()) {
|
||||
const auto& stride = ew_params.stride[0];
|
||||
for (size_t i = 1; i < ew_params.stride.size(); i++) {
|
||||
if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y)
|
||||
ew_params.layoutBased = true;
|
||||
}
|
||||
} else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1])) {
|
||||
ew_params.broadcast = true;
|
||||
}
|
||||
|
||||
// TODO [LOW PRECISION]: check if this parameter's really needed. Maybe data types are enough
|
||||
bool quantization = true;
|
||||
for (size_t i = 0; i < arg.inputs_count(); i++) {
|
||||
if (arg.input(i).get_output_layout().data_type != data_types::u8 &&
|
||||
arg.input(i).get_output_layout().data_type != data_types::i8) {
|
||||
quantization = false;
|
||||
}
|
||||
}
|
||||
ew_params.int8_quantization = quantization;
|
||||
|
||||
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto eltwise = new eltwise_impl(arg, best_kernels[0]);
|
||||
|
||||
return eltwise;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_eltwise_impl::attach_eltwise_impl() {
|
||||
implementation_map<eltwise>::add(impl_types::ocl, eltwise_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::i32, format::yxfb),
|
||||
std::make_tuple(data_types::i64, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i64, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::i32, format::byxf),
|
||||
std::make_tuple(data_types::i64, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::i64, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
std::make_tuple(data_types::i64, format::bfwzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i64, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i64, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "embedding_bag_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "embedding_bag/embedding_bag_kernel_selector.h"
|
||||
#include "embedding_bag/embedding_bag_kernel_ref.h"
|
||||
@ -14,13 +14,13 @@
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
struct embedding_bag_gpu : typed_primitive_gpu_impl<embedding_bag> {
|
||||
using parent = typed_primitive_gpu_impl<embedding_bag>;
|
||||
namespace ocl {
|
||||
struct embedding_bag_impl : typed_primitive_impl_ocl<embedding_bag> {
|
||||
using parent = typed_primitive_impl_ocl<embedding_bag>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<embedding_bag_gpu>(*this);
|
||||
return make_unique<embedding_bag_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -58,7 +58,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto embedding_bag = new embedding_bag_gpu(arg, best_kernels[0]);
|
||||
auto embedding_bag = new embedding_bag_impl(arg, best_kernels[0]);
|
||||
|
||||
return embedding_bag;
|
||||
}
|
||||
@ -66,12 +66,13 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_embedding_bag_gpu::attach_embedding_bag_gpu() {
|
||||
auto val_fw = embedding_bag_gpu::create;
|
||||
implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<embedding_bag>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
attach_embedding_bag_impl::attach_embedding_bag_impl() {
|
||||
implementation_map<embedding_bag>::add(impl_types::ocl, embedding_bag_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "extract_image_patches_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
|
||||
@ -12,14 +12,14 @@
|
||||
#include "extract_image_patches/extract_image_patches_kernel_ref.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct extract_image_patches_gpu : typed_primitive_gpu_impl<extract_image_patches> {
|
||||
using parent = typed_primitive_gpu_impl<extract_image_patches>;
|
||||
struct extract_image_patches_impl : typed_primitive_impl_ocl<extract_image_patches> {
|
||||
using parent = typed_primitive_impl_ocl<extract_image_patches>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<extract_image_patches_gpu>(*this);
|
||||
return make_unique<extract_image_patches_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -41,7 +41,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto extract_image_patches = new extract_image_patches_gpu(arg, best_kernels[0]);
|
||||
auto extract_image_patches = new extract_image_patches_impl(arg, best_kernels[0]);
|
||||
|
||||
return extract_image_patches;
|
||||
}
|
||||
@ -49,16 +49,17 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_extract_image_patches_gpu::attach_extract_image_patches_gpu() {
|
||||
implementation_map<extract_image_patches>::add(
|
||||
{{std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), extract_image_patches_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), extract_image_patches_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), extract_image_patches_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), extract_image_patches_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), extract_image_patches_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), extract_image_patches_gpu::create}});
|
||||
attach_extract_image_patches_impl::attach_extract_image_patches_impl() {
|
||||
implementation_map<extract_image_patches>::add(impl_types::ocl, extract_image_patches_impl::create, {
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i64, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -5,8 +5,8 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "fully_connected_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "fully_connected/fully_connected_kernel_selector.h"
|
||||
#include "fully_connected/fully_connected_params.h"
|
||||
@ -20,14 +20,14 @@
|
||||
#include <memory>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct fully_connected_gpu : typed_primitive_gpu_impl<fully_connected> {
|
||||
using parent = typed_primitive_gpu_impl<fully_connected>;
|
||||
struct fully_connected_impl : typed_primitive_impl_ocl<fully_connected> {
|
||||
using parent = typed_primitive_impl_ocl<fully_connected>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<fully_connected_gpu>(*this);
|
||||
return make_unique<fully_connected_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -71,7 +71,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto fc = new fully_connected_gpu(arg, best_kernels[0]);
|
||||
auto fc = new fully_connected_impl(arg, best_kernels[0]);
|
||||
|
||||
return fc;
|
||||
}
|
||||
@ -79,34 +79,29 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_fully_connected_gpu::attach_fully_connected_gpu() {
|
||||
auto val_fw = fully_connected_gpu::create;
|
||||
|
||||
implementation_map<fully_connected>::add({
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw},
|
||||
// MMAD
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), val_fw},
|
||||
// IMAD
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), val_fw},
|
||||
// fs_b_yx_fsv32
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), val_fw},
|
||||
attach_fully_connected_impl::attach_fully_connected_impl() {
|
||||
implementation_map<fully_connected>::add(impl_types::ocl, fully_connected_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "fused_conv_eltwise_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "kernel_runner.h"
|
||||
@ -14,14 +14,14 @@
|
||||
#include <memory>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct fused_conv_eltwise_gpu : typed_primitive_gpu_impl<fused_conv_eltwise> {
|
||||
using parent = typed_primitive_gpu_impl<fused_conv_eltwise>;
|
||||
struct fused_conv_eltwise_impl : typed_primitive_impl_ocl<fused_conv_eltwise> {
|
||||
using parent = typed_primitive_impl_ocl<fused_conv_eltwise>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<fused_conv_eltwise_gpu>(*this);
|
||||
return make_unique<fused_conv_eltwise_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -139,7 +139,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto conv = new fused_conv_eltwise_gpu(arg, best_kernels[0]);
|
||||
auto conv = new fused_conv_eltwise_impl(arg, best_kernels[0]);
|
||||
|
||||
return conv;
|
||||
}
|
||||
@ -147,42 +147,27 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_fused_conv_eltwise_gpu::attach_fused_conv_eltwise_gpu() {
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
// IMAD
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::image_2d_rgba),
|
||||
fused_conv_eltwise_gpu::create);
|
||||
attach_fused_conv_eltwise_impl::attach_fused_conv_eltwise_impl() {
|
||||
implementation_map<fused_conv_eltwise>::add(impl_types::ocl, fused_conv_eltwise_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::image_2d_rgba),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "gather_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "gather/gather_kernel_selector.h"
|
||||
#include "gather/gather_kernel_ref.h"
|
||||
@ -13,7 +13,7 @@
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
kernel_selector::gather_axis convert_axis(gather::gather_axis axis) {
|
||||
switch (axis) {
|
||||
case gather::along_x:
|
||||
@ -33,12 +33,12 @@ kernel_selector::gather_axis convert_axis(gather::gather_axis axis) {
|
||||
}
|
||||
}
|
||||
|
||||
struct gather_gpu : typed_primitive_gpu_impl<gather> {
|
||||
using parent = typed_primitive_gpu_impl<gather>;
|
||||
struct gather_impl : typed_primitive_impl_ocl<gather> {
|
||||
using parent = typed_primitive_impl_ocl<gather>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<gather_gpu>(*this);
|
||||
return make_unique<gather_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -61,7 +61,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto gather = new gather_gpu(arg, best_kernels[0]);
|
||||
auto gather = new gather_impl(arg, best_kernels[0]);
|
||||
|
||||
return gather;
|
||||
}
|
||||
@ -69,21 +69,20 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_gather_gpu::attach_gather_gpu() {
|
||||
auto val_fw = gather_gpu::create;
|
||||
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
|
||||
|
||||
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
|
||||
attach_gather_impl::attach_gather_impl() {
|
||||
implementation_map<gather>::add(impl_types::ocl, gather_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
67
inference-engine/thirdparty/clDNN/src/impls/ocl/gather_nd.cpp
vendored
Normal file
67
inference-engine/thirdparty/clDNN/src/impls/ocl/gather_nd.cpp
vendored
Normal file
@ -0,0 +1,67 @@
|
||||
// Copyright (C) 2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "gather_nd_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "gather/gather_nd_kernel_selector.h"
|
||||
#include "gather/gather_nd_kernel_ref.h"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct gather_nd_impl : typed_primitive_impl_ocl<gather_nd> {
|
||||
using parent = typed_primitive_impl_ocl<gather_nd>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<gather_nd_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const gather_nd_node& arg) {
|
||||
auto gather_nd_params = get_default_params<kernel_selector::gather_nd_params>(arg);
|
||||
auto gather_nd_optional_params =
|
||||
get_default_optional_params<kernel_selector::gather_nd_optional_params>(arg.get_program());
|
||||
|
||||
gather_nd_params.indices_rank = arg.get_primitive()->indices_rank;
|
||||
gather_nd_params.batch_dims = arg.get_primitive()->batch_dims;
|
||||
|
||||
gather_nd_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
|
||||
|
||||
auto& kernel_selector = kernel_selector::gather_nd_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(gather_nd_params, gather_nd_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto gather_nd = new gather_nd_impl(arg, best_kernels[0]);
|
||||
|
||||
return gather_nd;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_gather_nd_impl::attach_gather_nd_impl() {
|
||||
implementation_map<gather_nd>::add(impl_types::ocl, gather_nd_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -4,22 +4,22 @@
|
||||
|
||||
#include "gather_tree_inst.h"
|
||||
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "gather_tree/gather_tree_kernel_selector.h"
|
||||
#include "gather_tree/gather_tree_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct gather_tree_gpu : typed_primitive_gpu_impl<gather_tree> {
|
||||
using parent = typed_primitive_gpu_impl<gather_tree>;
|
||||
struct gather_tree_impl : typed_primitive_impl_ocl<gather_tree> {
|
||||
using parent = typed_primitive_impl_ocl<gather_tree>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<gather_tree_gpu>(*this);
|
||||
return make_unique<gather_tree_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const gather_tree_node& arg) {
|
||||
@ -39,22 +39,21 @@ struct gather_tree_gpu : typed_primitive_gpu_impl<gather_tree> {
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
return new gather_tree_gpu(arg, best_kernels[0]);
|
||||
return new gather_tree_impl(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
namespace detail {
|
||||
attach_gather_tree_gpu::attach_gather_tree_gpu() {
|
||||
auto val_fw = gather_tree_gpu::create;
|
||||
|
||||
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
|
||||
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
|
||||
|
||||
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
|
||||
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<gather_tree>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
|
||||
}
|
||||
attach_gather_tree_impl::attach_gather_tree_impl() {
|
||||
implementation_map<gather_tree>::add(impl_types::ocl, gather_tree_impl::create, {
|
||||
std::make_tuple(data_types::i32, format::yxfb),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::byxf),
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -4,22 +4,22 @@
|
||||
|
||||
#include "gemm_inst.h"
|
||||
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "gemm/gemm_kernel_selector.h"
|
||||
#include "gemm/gemm_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct gemm_gpu : typed_primitive_gpu_impl<gemm> {
|
||||
using parent = typed_primitive_gpu_impl<gemm>;
|
||||
struct gemm_impl : typed_primitive_impl_ocl<gemm> {
|
||||
using parent = typed_primitive_impl_ocl<gemm>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<gemm_gpu>(*this);
|
||||
return make_unique<gemm_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -53,28 +53,29 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
return new gemm_gpu(arg, best_kernels[0]);
|
||||
return new gemm_impl(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_gemm_gpu::attach_gemm_gpu() {
|
||||
auto val_fw = gemm_gpu::create;
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
|
||||
implementation_map<gemm>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
|
||||
attach_gemm_impl::attach_gemm_impl() {
|
||||
implementation_map<gemm>::add(impl_types::ocl, gemm_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -4,38 +4,37 @@
|
||||
|
||||
#include "generic_layer_inst.h"
|
||||
#include "cldnn/runtime/engine.hpp"
|
||||
#include "implementation_map.h"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "network_impl.h"
|
||||
#include "register_gpu.hpp"
|
||||
#include "register.hpp"
|
||||
#include <vector>
|
||||
|
||||
using namespace cldnn;
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
namespace neural {
|
||||
|
||||
struct generic_layer_gpu : typed_primitive_impl<generic_layer> {
|
||||
struct generic_layer_impl : typed_primitive_impl<generic_layer> {
|
||||
const generic_layer_node& outer;
|
||||
const kernel_selector::cl_kernel_data& _cl_kernel_data;
|
||||
std::vector<kernel::ptr> _kernels;
|
||||
kernel_id _kernel_id;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<generic_layer_gpu>(*this);
|
||||
return make_unique<generic_layer_impl>(*this);
|
||||
}
|
||||
|
||||
generic_layer_gpu(const generic_layer_gpu& other)
|
||||
generic_layer_impl(const generic_layer_impl& other)
|
||||
: outer(other.outer)
|
||||
, _cl_kernel_data(other._cl_kernel_data)
|
||||
, _kernels({})
|
||||
, _kernel_id(other._kernel_id) {
|
||||
if (other._kernels.empty()) {
|
||||
throw std::runtime_error("Can't copy generic_layer_gpu node: kernels vector is empty");
|
||||
throw std::runtime_error("Can't copy generic_layer_impl node: kernels vector is empty");
|
||||
}
|
||||
_kernels.push_back(other._kernels.front()->clone());
|
||||
}
|
||||
|
||||
generic_layer_gpu(const generic_layer_node& arg)
|
||||
generic_layer_impl(const generic_layer_node& arg)
|
||||
: outer(arg)
|
||||
, _cl_kernel_data(*outer.get_primitive()->generic_params.clKernel.get())
|
||||
, _kernels() {
|
||||
@ -109,17 +108,17 @@ struct generic_layer_cpu : typed_primitive_impl<generic_layer> {
|
||||
|
||||
static primitive_impl* create(const generic_layer_node& arg) {
|
||||
if (arg.get_primitive()->generic_params.engine == kernel_selector::generic_kernel_params::Engine::GPU) {
|
||||
return new generic_layer_gpu(arg);
|
||||
return new generic_layer_impl(arg);
|
||||
} else {
|
||||
return new generic_layer_cpu(arg);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace neural
|
||||
namespace detail {
|
||||
attach_generic_layer_impl::attach_generic_layer_impl() {
|
||||
implementation_map<generic_layer>::add(cldnn::impl_types::ocl, create, {});
|
||||
}
|
||||
|
||||
namespace cldnn { namespace gpu { namespace detail {
|
||||
attach_generic_layer_gpu::attach_generic_layer_gpu() {
|
||||
implementation_map<generic_layer>::add({ {cldnn::engine_types::ocl, neural::create} });
|
||||
}
|
||||
|
||||
} } } // namespace cldnn::gpu::detail
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "grn_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "grn/grn_kernel_selector.h"
|
||||
@ -15,14 +15,14 @@
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct grn_gpu : typed_primitive_gpu_impl<grn> {
|
||||
using parent = typed_primitive_gpu_impl<grn>;
|
||||
struct grn_impl : typed_primitive_impl_ocl<grn> {
|
||||
using parent = typed_primitive_impl_ocl<grn>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<grn_gpu>(*this);
|
||||
return make_unique<grn_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -40,7 +40,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto grn = new grn_gpu(arg, best_kernels[0]);
|
||||
auto grn = new grn_impl(arg, best_kernels[0]);
|
||||
|
||||
return grn;
|
||||
}
|
||||
@ -48,11 +48,13 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_grn_gpu::attach_grn_gpu() {
|
||||
implementation_map<grn>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), grn_gpu::create);
|
||||
implementation_map<grn>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), grn_gpu::create);
|
||||
attach_grn_impl::attach_grn_impl() {
|
||||
implementation_map<grn>::add(impl_types::ocl, grn_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
82
inference-engine/thirdparty/clDNN/src/impls/ocl/lrn.cpp
vendored
Normal file
82
inference-engine/thirdparty/clDNN/src/impls/ocl/lrn.cpp
vendored
Normal file
@ -0,0 +1,82 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "lrn_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "lrn/lrn_kernel_selector.h"
|
||||
#include "lrn/lrn_kernel_base.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct lrn_impl : typed_primitive_impl_ocl<lrn> {
|
||||
using parent = typed_primitive_impl_ocl<lrn>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<lrn_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const lrn_node& arg) {
|
||||
auto lrn_params = get_default_params<kernel_selector::lrn_params>(arg);
|
||||
auto lrn_optional_params = get_default_optional_params<kernel_selector::lrn_optional_params>(arg.get_program());
|
||||
|
||||
const auto& primitive = arg.get_primitive();
|
||||
|
||||
lrn_params.alpha = primitive->alpha;
|
||||
lrn_params.beta = primitive->beta;
|
||||
lrn_params.k = primitive->k;
|
||||
lrn_params.localSize = primitive->size;
|
||||
lrn_params.divMode = kernel_selector::kernel_divider_mode::FIXED;
|
||||
lrn_params.normMode = primitive->norm_region == lrn_norm_region_within_channel
|
||||
? kernel_selector::lrn_mode::WITHIN_CHANNEL
|
||||
: kernel_selector::lrn_mode::ACROSS_CHANNEL;
|
||||
|
||||
auto& kernel_selector = kernel_selector::lrn_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(lrn_params, lrn_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto lrn = new lrn_impl(arg, best_kernels[0]);
|
||||
|
||||
return lrn;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_lrn_impl::attach_lrn_impl() {
|
||||
implementation_map<lrn>::add(impl_types::ocl, lrn_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::u8, format::yxfb),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::u8, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -5,8 +5,8 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "lstm_dynamic_input_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "lstm_dynamic/lstm_dynamic_input_kernel_selector.h"
|
||||
#include "lstm_dynamic/lstm_dynamic_input_kernel_base.h"
|
||||
@ -14,14 +14,14 @@
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct lstm_dynamic_input_gpu : typed_primitive_gpu_impl<lstm_dynamic_input> {
|
||||
using parent = typed_primitive_gpu_impl<lstm_dynamic_input>;
|
||||
struct lstm_dynamic_input_impl : typed_primitive_impl_ocl<lstm_dynamic_input> {
|
||||
using parent = typed_primitive_impl_ocl<lstm_dynamic_input>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<lstm_dynamic_input_gpu>(*this);
|
||||
return make_unique<lstm_dynamic_input_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -64,7 +64,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto lstm_dynamic = new lstm_dynamic_input_gpu(arg, best_kernels[0]);
|
||||
auto lstm_dynamic = new lstm_dynamic_input_impl(arg, best_kernels[0]);
|
||||
|
||||
return lstm_dynamic;
|
||||
}
|
||||
@ -72,15 +72,13 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_lstm_dynamic_input_gpu::attach_lstm_dynamic_input_gpu() {
|
||||
auto val_fw = lstm_dynamic_input_gpu::create;
|
||||
|
||||
implementation_map<lstm_dynamic_input>::add({
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
|
||||
attach_lstm_dynamic_input_impl::attach_lstm_dynamic_input_impl() {
|
||||
implementation_map<lstm_dynamic_input>::add(impl_types::ocl, lstm_dynamic_input_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -5,8 +5,8 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "lstm_dynamic_timeloop_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "lstm_dynamic/lstm_dynamic_timeloop_kernel_selector.h"
|
||||
#include "lstm_dynamic/lstm_dynamic_timeloop_kernel_base.h"
|
||||
@ -14,14 +14,14 @@
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct lstm_dynamic_timeloop_gpu : typed_primitive_gpu_impl<lstm_dynamic_timeloop> {
|
||||
using parent = typed_primitive_gpu_impl<lstm_dynamic_timeloop>;
|
||||
struct lstm_dynamic_timeloop_impl : typed_primitive_impl_ocl<lstm_dynamic_timeloop> {
|
||||
using parent = typed_primitive_impl_ocl<lstm_dynamic_timeloop>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<lstm_dynamic_timeloop_gpu>(*this);
|
||||
return make_unique<lstm_dynamic_timeloop_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -85,7 +85,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto lstm_dynamic = new lstm_dynamic_timeloop_gpu(arg, best_kernels[0]);
|
||||
auto lstm_dynamic = new lstm_dynamic_timeloop_impl(arg, best_kernels[0]);
|
||||
|
||||
return lstm_dynamic;
|
||||
}
|
||||
@ -93,15 +93,13 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_lstm_dynamic_timeloop_gpu::attach_lstm_dynamic_timeloop_gpu() {
|
||||
auto val_fw = lstm_dynamic_timeloop_gpu::create;
|
||||
|
||||
implementation_map<lstm_dynamic_timeloop>::add({
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
|
||||
attach_lstm_dynamic_timeloop_impl::attach_lstm_dynamic_timeloop_impl() {
|
||||
implementation_map<lstm_dynamic_timeloop>::add(impl_types::ocl, lstm_dynamic_timeloop_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -5,8 +5,8 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "lstm_elt_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "lstm/lstm_elt_kernel_selector.h"
|
||||
#include "lstm/lstm_elt_kernel_base.h"
|
||||
@ -14,14 +14,14 @@
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct lstm_elt_gpu : typed_primitive_gpu_impl<lstm_elt> {
|
||||
using parent = typed_primitive_gpu_impl<lstm_elt>;
|
||||
struct lstm_elt_impl : typed_primitive_impl_ocl<lstm_elt> {
|
||||
using parent = typed_primitive_impl_ocl<lstm_elt>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<lstm_elt_gpu>(*this);
|
||||
return make_unique<lstm_elt_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -85,7 +85,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto lstm_elt = new lstm_elt_gpu(arg, best_kernels[0]);
|
||||
auto lstm_elt = new lstm_elt_impl(arg, best_kernels[0]);
|
||||
|
||||
return lstm_elt;
|
||||
}
|
||||
@ -93,17 +93,15 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_lstm_elt_gpu::attach_lstm_elt_gpu() {
|
||||
auto val_fw = lstm_elt_gpu::create;
|
||||
|
||||
implementation_map<lstm_elt>::add({
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
|
||||
attach_lstm_elt_impl::attach_lstm_elt_impl() {
|
||||
implementation_map<lstm_elt>::add(impl_types::ocl, lstm_elt_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::fyxb),
|
||||
std::make_tuple(data_types::f16, format::fyxb),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -5,8 +5,8 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "lstm_gemm_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "lstm/lstm_gemm_kernel_selector.h"
|
||||
#include "lstm/lstm_gemm_kernel_base.h"
|
||||
@ -14,14 +14,14 @@
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct lstm_gemm_gpu : typed_primitive_gpu_impl<lstm_gemm> {
|
||||
using parent = typed_primitive_gpu_impl<lstm_gemm>;
|
||||
struct lstm_gemm_impl : typed_primitive_impl_ocl<lstm_gemm> {
|
||||
using parent = typed_primitive_impl_ocl<lstm_gemm>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<lstm_gemm_gpu>(*this);
|
||||
return make_unique<lstm_gemm_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -82,7 +82,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto lstm_gemm = new lstm_gemm_gpu(arg, best_kernels[0]);
|
||||
auto lstm_gemm = new lstm_gemm_impl(arg, best_kernels[0]);
|
||||
|
||||
return lstm_gemm;
|
||||
}
|
||||
@ -90,17 +90,15 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_lstm_gemm_gpu::attach_lstm_gemm_gpu() {
|
||||
auto val_fw = lstm_gemm_gpu::create;
|
||||
|
||||
implementation_map<lstm_gemm>::add({
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw},
|
||||
attach_lstm_gemm_impl::attach_lstm_gemm_impl() {
|
||||
implementation_map<lstm_gemm>::add(impl_types::ocl, lstm_gemm_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::fyxb),
|
||||
std::make_tuple(data_types::f16, format::fyxb),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "max_unpooling_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "network_impl.h"
|
||||
#include "kernel_selector_helper.h"
|
||||
@ -13,14 +13,14 @@
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct max_unpooling_gpu : typed_primitive_gpu_impl<max_unpooling> {
|
||||
using parent = typed_primitive_gpu_impl<max_unpooling>;
|
||||
struct max_unpooling_impl : typed_primitive_impl_ocl<max_unpooling> {
|
||||
using parent = typed_primitive_impl_ocl<max_unpooling>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<max_unpooling_gpu>(*this);
|
||||
return make_unique<max_unpooling_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -55,7 +55,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto max_unpool = new max_unpooling_gpu(arg, best_kernels[0]);
|
||||
auto max_unpool = new max_unpooling_impl(arg, best_kernels[0]);
|
||||
|
||||
return max_unpool;
|
||||
}
|
||||
@ -63,27 +63,20 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_max_unpooling_gpu::attach_max_unpooling_gpu() {
|
||||
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
|
||||
max_unpooling_gpu::create);
|
||||
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
|
||||
max_unpooling_gpu::create);
|
||||
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
max_unpooling_gpu::create);
|
||||
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
max_unpooling_gpu::create);
|
||||
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx),
|
||||
max_unpooling_gpu::create);
|
||||
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb),
|
||||
max_unpooling_gpu::create);
|
||||
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
|
||||
max_unpooling_gpu::create);
|
||||
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
|
||||
max_unpooling_gpu::create);
|
||||
implementation_map<max_unpooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf),
|
||||
max_unpooling_gpu::create);
|
||||
attach_max_unpooling_impl::attach_max_unpooling_impl() {
|
||||
implementation_map<max_unpooling>::add(impl_types::ocl, max_unpooling_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
32
inference-engine/thirdparty/clDNN/src/impls/ocl/mutable_data.cpp
vendored
Normal file
32
inference-engine/thirdparty/clDNN/src/impls/ocl/mutable_data.cpp
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "mutable_data_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct mutable_data_impl : public typed_primitive_impl_ocl<mutable_data> {
|
||||
using parent = typed_primitive_impl_ocl<mutable_data>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<mutable_data_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(mutable_data_node const& arg) { return new mutable_data_impl(arg, {}); }
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_mutable_data_impl::attach_mutable_data_impl() {
|
||||
implementation_map<mutable_data>::add(impl_types::ocl, mutable_data_impl::create, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
86
inference-engine/thirdparty/clDNN/src/impls/ocl/mvn.cpp
vendored
Normal file
86
inference-engine/thirdparty/clDNN/src/impls/ocl/mvn.cpp
vendored
Normal file
@ -0,0 +1,86 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "mvn_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "mvn/mvn_kernel_selector.h"
|
||||
#include "mvn/mvn_kernel_base.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct mvn_impl : typed_primitive_impl_ocl<mvn> {
|
||||
using parent = typed_primitive_impl_ocl<mvn>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<mvn_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const mvn_node& arg) {
|
||||
auto mvn_params = get_default_params<kernel_selector::mvn_params>(arg);
|
||||
auto mvn_optional_params = get_default_optional_params<kernel_selector::mvn_optional_params>(arg.get_program());
|
||||
|
||||
mvn_params.mvnMode = arg.get_primitive()->across_channels ? kernel_selector::mvn_mode::ACROSS_CHANNELS
|
||||
: kernel_selector::mvn_mode::WITHIN_CHANNELS;
|
||||
mvn_params.mvnNormalizeVariance = arg.get_primitive()->normalize_variance;
|
||||
mvn_params.epsilon = arg.get_primitive()->epsilon;
|
||||
|
||||
mvn_params.mvnEpsMode = arg.get_primitive()->eps_inside_sqrt ? kernel_selector::mvn_eps_mode::INSIDE_SQRT
|
||||
: kernel_selector::mvn_eps_mode::OUTSIDE_SQRT;
|
||||
|
||||
auto& kernel_selector = kernel_selector::mvn_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(mvn_params, mvn_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto mvn = new mvn_impl(arg, best_kernels[0]);
|
||||
|
||||
return mvn;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_mvn_impl::attach_mvn_impl() {
|
||||
implementation_map<mvn>::add(impl_types::ocl, mvn_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
83
inference-engine/thirdparty/clDNN/src/impls/ocl/normalize.cpp
vendored
Normal file
83
inference-engine/thirdparty/clDNN/src/impls/ocl/normalize.cpp
vendored
Normal file
@ -0,0 +1,83 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "normalize_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "normalize/normalize_kernel_selector.h"
|
||||
#include "normalize/normalize_kernel_base.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct normalize_impl : typed_primitive_impl_ocl<normalize> {
|
||||
using parent = typed_primitive_impl_ocl<normalize>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<normalize_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<normalize>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
args.scale_table = instance.scale_memory();
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const normalize_node& arg) {
|
||||
auto norm_params = get_default_params<kernel_selector::normalize_params>(arg);
|
||||
auto norm_optional_params =
|
||||
get_default_optional_params<kernel_selector::normalize_optional_params>(arg.get_program());
|
||||
|
||||
const auto& scale_layout = arg.scale().get_output_layout();
|
||||
|
||||
norm_params.normMode = arg.get_primitive()->across_spatial ? kernel_selector::normalize_mode::ACROSS_SPATIAL
|
||||
: kernel_selector::normalize_mode::WITHIN_SPATIAL;
|
||||
norm_params.epsilon = arg.get_primitive()->epsilon;
|
||||
norm_params.scaleTable = convert_data_tensor(scale_layout).FlattenFeatureAndSpatials();
|
||||
|
||||
auto& kernel_selector = kernel_selector::normalize_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(norm_params, norm_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto lrn = new normalize_impl(arg, best_kernels[0]);
|
||||
|
||||
return lrn;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_normalize_impl::attach_normalize_impl() {
|
||||
implementation_map<normalize>::add(impl_types::ocl, normalize_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::u8, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::u8, format::byxf),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
74
inference-engine/thirdparty/clDNN/src/impls/ocl/one_hot.cpp
vendored
Normal file
74
inference-engine/thirdparty/clDNN/src/impls/ocl/one_hot.cpp
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "one_hot_inst.h"
|
||||
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "one_hot/one_hot_kernel_selector.h"
|
||||
#include "one_hot/one_hot_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct one_hot_impl : typed_primitive_impl_ocl<one_hot> {
|
||||
using parent = typed_primitive_impl_ocl<one_hot>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<one_hot_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const one_hot_node& arg) {
|
||||
auto oh_params = get_default_params<kernel_selector::one_hot_params>(arg, 1);
|
||||
auto oh_optional_params =
|
||||
get_default_optional_params<kernel_selector::one_hot_optional_params>(arg.get_program());
|
||||
|
||||
oh_params.one_hot_axis = arg.get_primitive()->one_hot_axis;
|
||||
oh_params.on_value = arg.get_primitive()->on_value;
|
||||
oh_params.off_value = arg.get_primitive()->off_value;
|
||||
|
||||
auto output_sizes = arg.get_output_layout().format == format::bfzyx ?
|
||||
arg.get_output_layout().size.sizes(format::bfzyx) :
|
||||
arg.get_output_layout().size.sizes(format::bfyx);
|
||||
|
||||
oh_params.one_hot_limit = output_sizes[oh_params.one_hot_axis];
|
||||
|
||||
auto& kernel_selector = kernel_selector::one_hot_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(oh_params, oh_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with these arguments");
|
||||
|
||||
return new one_hot_impl(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_one_hot_impl::attach_one_hot_impl() {
|
||||
implementation_map<one_hot>::add(impl_types::ocl, one_hot_impl::create, {
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i64, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::i64, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "permute_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "permute/permute_kernel_selector.h"
|
||||
@ -13,14 +13,14 @@
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct permute_gpu : typed_primitive_gpu_impl<permute> {
|
||||
using parent = typed_primitive_gpu_impl<permute>;
|
||||
struct permute_impl : typed_primitive_impl_ocl<permute> {
|
||||
using parent = typed_primitive_impl_ocl<permute>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<permute_gpu>(*this);
|
||||
return make_unique<permute_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const permute_node& arg) {
|
||||
@ -38,7 +38,7 @@ struct permute_gpu : typed_primitive_gpu_impl<permute> {
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto permute = new permute_gpu(arg, best_kernels[0]);
|
||||
auto permute = new permute_impl(arg, best_kernels[0]);
|
||||
|
||||
return permute;
|
||||
}
|
||||
@ -46,12 +46,10 @@ struct permute_gpu : typed_primitive_gpu_impl<permute> {
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_permute_gpu::attach_permute_gpu() {
|
||||
implementation_map<permute>::add({
|
||||
{engine_types::ocl, permute_gpu::create},
|
||||
});
|
||||
attach_permute_impl::attach_permute_impl() {
|
||||
implementation_map<permute>::add(impl_types::ocl, permute_impl::create, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "pooling_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "pooling/pooling_kernel_selector.h"
|
||||
@ -12,7 +12,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
namespace {
|
||||
void validate_args(const pooling_node& arg) {
|
||||
@ -64,12 +64,12 @@ kernel_selector::kernel_divider_mode cldnn_2_kernel_divider_mode(pooling_mode mo
|
||||
}
|
||||
} // namespace
|
||||
|
||||
struct pooling_gpu : typed_primitive_gpu_impl<pooling> {
|
||||
using parent = typed_primitive_gpu_impl<pooling>;
|
||||
struct pooling_impl : typed_primitive_impl_ocl<pooling> {
|
||||
using parent = typed_primitive_impl_ocl<pooling>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<pooling_gpu>(*this);
|
||||
return make_unique<pooling_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -148,7 +148,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto pool = new pooling_gpu(arg, best_kernels[0]);
|
||||
auto pool = new pooling_impl(arg, best_kernels[0]);
|
||||
|
||||
return pool;
|
||||
}
|
||||
@ -156,66 +156,57 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_pooling_gpu::attach_pooling_gpu() {
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_yx_bsv16_fsv16), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv16), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bs_fs_zyx_bsv16_fsv16), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv32), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv32), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv32), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv32), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_zyx_fsv32), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_zyx_fsv32), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_zyx_fsv32), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_zyx_fsv32), pooling_gpu::create);
|
||||
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), pooling_gpu::create);
|
||||
implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fs_b_yx_fsv32), pooling_gpu::create);
|
||||
attach_pooling_impl::attach_pooling_impl() {
|
||||
implementation_map<pooling>::add(impl_types::ocl, pooling_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::i8, format::yxfb),
|
||||
std::make_tuple(data_types::u8, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::u8, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::fs_b_yx_fsv32),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -2,11 +2,11 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include <list>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
bool is_user_cpu(const program_node* user) {
|
||||
if (user->can_be_optimized()) {
|
||||
@ -28,5 +28,5 @@ bool is_any_user_cpu(const std::list<const program_node*>& users) {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} // namespace gpu
|
||||
} // namespace cldnn
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -11,30 +11,30 @@
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "network_impl.h"
|
||||
#include "register_gpu.hpp"
|
||||
#include "register.hpp"
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <utility>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
// checks if any user in a list is a cpu primitive
|
||||
bool is_any_user_cpu(const std::list<const program_node*>& users);
|
||||
|
||||
/*
|
||||
Base class for all GPU implementation of specified primitive type.
|
||||
For example, all gpu convolution implementations should derive from typed_primitive_gpu_impl<convolution>.
|
||||
For example, all gpu convolution implementations should derive from typed_primitive_impl_ocl<convolution>.
|
||||
*/
|
||||
template <class PType>
|
||||
struct typed_primitive_gpu_impl : public typed_primitive_impl<PType> {
|
||||
struct typed_primitive_impl_ocl : public typed_primitive_impl<PType> {
|
||||
const typed_program_node<PType>& _outer;
|
||||
kernel_selector::kernel_data _kernel_data;
|
||||
std::vector<kernel_id> _kernel_ids;
|
||||
std::vector<kernel::ptr> _kernels;
|
||||
std::vector<memory::cptr> _intermediates_memory;
|
||||
|
||||
typed_primitive_gpu_impl(const typed_primitive_gpu_impl<PType>& other)
|
||||
typed_primitive_impl_ocl(const typed_primitive_impl_ocl<PType>& other)
|
||||
: typed_primitive_impl<PType>(other._weights_reorder_params, other._kernel_name)
|
||||
, _outer(other._outer)
|
||||
, _kernel_data(other._kernel_data)
|
||||
@ -52,7 +52,7 @@ struct typed_primitive_gpu_impl : public typed_primitive_impl<PType> {
|
||||
}
|
||||
}
|
||||
|
||||
typed_primitive_gpu_impl(const typed_program_node<PType>& arg, const kernel_selector::kernel_data& kd)
|
||||
typed_primitive_impl_ocl(const typed_program_node<PType>& arg, const kernel_selector::kernel_data& kd)
|
||||
: typed_primitive_impl<PType>(kd.weightsReorderParams, kd.kernelName),
|
||||
_outer(arg),
|
||||
_kernel_data(kd) {
|
||||
@ -199,5 +199,5 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -2,8 +2,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "pyramid_roi_align/pyramid_roi_align_kernel_selector.h"
|
||||
#include "pyramid_roi_align/pyramid_roi_align_kernel_base.h"
|
||||
@ -14,14 +14,14 @@
|
||||
#include <cmath>
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct pyramid_roi_align_gpu : typed_primitive_gpu_impl<pyramid_roi_align> {
|
||||
using parent = typed_primitive_gpu_impl<pyramid_roi_align>;
|
||||
struct pyramid_roi_align_impl : typed_primitive_impl_ocl<pyramid_roi_align> {
|
||||
using parent = typed_primitive_impl_ocl<pyramid_roi_align>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<pyramid_roi_align_gpu>(*this);
|
||||
return make_unique<pyramid_roi_align_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const pyramid_roi_align_node& arg) {
|
||||
@ -54,28 +54,23 @@ struct pyramid_roi_align_gpu : typed_primitive_gpu_impl<pyramid_roi_align> {
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
return new pyramid_roi_align_gpu(arg, best_kernels[0]);
|
||||
return new pyramid_roi_align_impl(arg, best_kernels[0]);
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_pyramid_roi_align_gpu::attach_pyramid_roi_align_gpu() {
|
||||
auto val_fw = pyramid_roi_align_gpu::create;
|
||||
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
val_fw);
|
||||
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
|
||||
val_fw);
|
||||
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf),
|
||||
val_fw);
|
||||
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
val_fw);
|
||||
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb),
|
||||
val_fw);
|
||||
implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf),
|
||||
val_fw);
|
||||
attach_pyramid_roi_align_impl::attach_pyramid_roi_align_impl() {
|
||||
implementation_map<pyramid_roi_align>::add(impl_types::ocl, pyramid_roi_align_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
160
inference-engine/thirdparty/clDNN/src/impls/ocl/quantize.cpp
vendored
Normal file
160
inference-engine/thirdparty/clDNN/src/impls/ocl/quantize.cpp
vendored
Normal file
@ -0,0 +1,160 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "quantize_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "quantize/quantize_kernel_selector.h"
|
||||
#include "quantize/quantize_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct quantize_impl : typed_primitive_impl_ocl<quantize> {
|
||||
using parent = typed_primitive_impl_ocl<quantize>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<quantize_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<quantize>& instance, int32_t) const override {
|
||||
kernel_arguments_data args;
|
||||
|
||||
for (size_t i = 0; i < instance.inputs_memory_count(); i++) {
|
||||
args.inputs.push_back(instance.input_memory_ptr(i));
|
||||
}
|
||||
if (instance.node.get_scale_shift_opt()) {
|
||||
if (instance.node.get_dependencies().size() == 9) {
|
||||
args.inputs.push_back(instance.dep_memory_ptr(5));
|
||||
args.inputs.push_back(instance.dep_memory_ptr(6));
|
||||
args.inputs.push_back(instance.dep_memory_ptr(7));
|
||||
args.inputs.push_back(instance.dep_memory_ptr(8));
|
||||
}
|
||||
}
|
||||
args.output = instance.output_memory_ptr();
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const quantize_node& arg) {
|
||||
auto quantize_params = get_default_params<kernel_selector::quantize_params>(arg);
|
||||
auto quantize_optional_params =
|
||||
get_default_optional_params<kernel_selector::quantize_optional_params>(arg.get_program());
|
||||
|
||||
quantize_params.levels = arg.get_levels();
|
||||
quantize_params.packed_binary_output = arg.get_packed_binary_output();
|
||||
quantize_params.scale_shift_opt = arg.get_scale_shift_opt();
|
||||
quantize_params.has_post_scale = arg.get_need_post_scale();
|
||||
quantize_params.has_post_shift = arg.get_need_post_shift();
|
||||
quantize_params.has_pre_shift = arg.get_need_pre_shift();
|
||||
quantize_params.has_clamp = arg.get_need_clamp();
|
||||
|
||||
quantize_params.per_tensor_input_range = arg.get_per_tensor_input_range();
|
||||
quantize_params.per_tensor_input_scale = arg.get_per_tensor_input_scale();
|
||||
quantize_params.per_tensor_input_shift = arg.get_per_tensor_input_shift();
|
||||
quantize_params.per_tensor_output_scale = arg.get_per_tensor_output_scale();
|
||||
quantize_params.per_tensor_output_shift = arg.get_per_tensor_output_shift();
|
||||
|
||||
quantize_params.in_lo = arg.get_input_lo_val();
|
||||
quantize_params.in_hi = arg.get_input_hi_val();
|
||||
quantize_params.in_scale = arg.get_input_scale_val();
|
||||
quantize_params.in_shift = arg.get_input_shift_val();
|
||||
quantize_params.out_scale = arg.get_output_scale_val();
|
||||
quantize_params.out_shift = arg.get_output_shift_val();
|
||||
|
||||
for (size_t i = 1; i < arg.inputs_count(); i++) {
|
||||
quantize_params.inputs.push_back(convert_data_tensor(arg.input(i).get_output_layout()));
|
||||
}
|
||||
const auto& output_layout = arg.get_output_layout();
|
||||
quantize_params.output = convert_data_tensor(output_layout);
|
||||
|
||||
auto& kernel_selector = kernel_selector::quantize_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(quantize_params, quantize_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto quantize = new quantize_impl(arg, best_kernels[0]);
|
||||
|
||||
return quantize;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_quantize_impl::attach_quantize_impl() {
|
||||
implementation_map<quantize>::add(impl_types::ocl, quantize_impl::create, {
|
||||
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::fs_b_yx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::fs_b_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::fs_b_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::u8, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i32, format::byxf),
|
||||
std::make_tuple(data_types::u8, format::byxf),
|
||||
std::make_tuple(data_types::i8, format::byxf),
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i8, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::bs_fs_yx_bsv16_fsv16),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "reduce_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "reduce/reduce_kernel_selector.h"
|
||||
#include "reduce/reduce_kernel_ref.h"
|
||||
@ -15,7 +15,7 @@
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
namespace {
|
||||
kernel_selector::reduce_mode cldnn_2_reduce_mode(reduce_mode mode) {
|
||||
switch (mode) {
|
||||
@ -49,12 +49,12 @@ kernel_selector::reduce_mode cldnn_2_reduce_mode(reduce_mode mode) {
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
struct reduce_gpu : typed_primitive_gpu_impl<reduce> {
|
||||
using parent = typed_primitive_gpu_impl<reduce>;
|
||||
struct reduce_impl : typed_primitive_impl_ocl<reduce> {
|
||||
using parent = typed_primitive_impl_ocl<reduce>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<reduce_gpu>(*this);
|
||||
return make_unique<reduce_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -71,7 +71,7 @@ public:
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto reduce = new reduce_gpu(arg, best_kernels[0]);
|
||||
auto reduce = new reduce_impl(arg, best_kernels[0]);
|
||||
|
||||
return reduce;
|
||||
}
|
||||
@ -79,30 +79,31 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_reduce_gpu::attach_reduce_gpu() {
|
||||
auto val_fw = reduce_gpu::create;
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfwzyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfwzyx), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), val_fw);
|
||||
implementation_map<reduce>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), val_fw);
|
||||
attach_reduce_impl::attach_reduce_impl() {
|
||||
implementation_map<reduce>::add(impl_types::ocl, reduce_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,22 +3,22 @@
|
||||
//
|
||||
|
||||
#include "region_yolo_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "region_yolo/region_yolo_kernel_selector.h"
|
||||
#include "region_yolo/region_yolo_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
|
||||
using parent = typed_primitive_gpu_impl<region_yolo>;
|
||||
struct region_yolo_impl : typed_primitive_impl_ocl<region_yolo> {
|
||||
using parent = typed_primitive_impl_ocl<region_yolo>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<region_yolo_gpu>(*this);
|
||||
return make_unique<region_yolo_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const region_yolo_node& arg) {
|
||||
@ -41,7 +41,7 @@ struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto region_yolo_node = new region_yolo_gpu(arg, best_kernels[0]);
|
||||
auto region_yolo_node = new region_yolo_impl(arg, best_kernels[0]);
|
||||
|
||||
return region_yolo_node;
|
||||
}
|
||||
@ -49,14 +49,15 @@ struct region_yolo_gpu : typed_primitive_gpu_impl<region_yolo> {
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_region_yolo_gpu::attach_region_yolo_gpu() {
|
||||
implementation_map<region_yolo>::add(
|
||||
{{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), region_yolo_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), region_yolo_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), region_yolo_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), region_yolo_gpu::create}});
|
||||
attach_region_yolo_impl::attach_region_yolo_impl() {
|
||||
implementation_map<region_yolo>::add(impl_types::ocl, region_yolo_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
79
inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
vendored
Normal file
79
inference-engine/thirdparty/clDNN/src/impls/ocl/register.cpp
vendored
Normal file
@ -0,0 +1,79 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#include "register.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
#define REGISTER_OCL(prim) \
|
||||
static detail::attach_##prim##_impl attach_##prim
|
||||
|
||||
void register_implementations() {
|
||||
REGISTER_OCL(activation);
|
||||
REGISTER_OCL(arg_max_min);
|
||||
REGISTER_OCL(average_unpooling);
|
||||
REGISTER_OCL(binary_convolution);
|
||||
REGISTER_OCL(border);
|
||||
REGISTER_OCL(broadcast);
|
||||
REGISTER_OCL(concatenation);
|
||||
REGISTER_OCL(convolution);
|
||||
REGISTER_OCL(crop);
|
||||
REGISTER_OCL(custom_gpu_primitive);
|
||||
REGISTER_OCL(deconvolution);
|
||||
REGISTER_OCL(deformable_conv);
|
||||
REGISTER_OCL(deformable_interp);
|
||||
REGISTER_OCL(depth_to_space);
|
||||
REGISTER_OCL(batch_to_space);
|
||||
REGISTER_OCL(eltwise);
|
||||
REGISTER_OCL(fully_connected);
|
||||
REGISTER_OCL(gather);
|
||||
REGISTER_OCL(gather_nd);
|
||||
REGISTER_OCL(gemm);
|
||||
REGISTER_OCL(lrn);
|
||||
REGISTER_OCL(lstm_gemm);
|
||||
REGISTER_OCL(lstm_elt);
|
||||
REGISTER_OCL(max_unpooling);
|
||||
REGISTER_OCL(mutable_data);
|
||||
REGISTER_OCL(mvn);
|
||||
REGISTER_OCL(normalize);
|
||||
REGISTER_OCL(one_hot);
|
||||
REGISTER_OCL(permute);
|
||||
REGISTER_OCL(pooling);
|
||||
REGISTER_OCL(pyramid_roi_align);
|
||||
REGISTER_OCL(quantize);
|
||||
REGISTER_OCL(reduce);
|
||||
REGISTER_OCL(region_yolo);
|
||||
REGISTER_OCL(reorder);
|
||||
REGISTER_OCL(reorg_yolo);
|
||||
REGISTER_OCL(reshape);
|
||||
REGISTER_OCL(reverse_sequence);
|
||||
REGISTER_OCL(roi_pooling);
|
||||
REGISTER_OCL(scale);
|
||||
REGISTER_OCL(scatter_update);
|
||||
REGISTER_OCL(scatter_nd_update);
|
||||
REGISTER_OCL(scatter_elements_update);
|
||||
REGISTER_OCL(select);
|
||||
REGISTER_OCL(shuffle_channels);
|
||||
REGISTER_OCL(softmax);
|
||||
REGISTER_OCL(space_to_batch);
|
||||
REGISTER_OCL(space_to_depth);
|
||||
REGISTER_OCL(strided_slice);
|
||||
REGISTER_OCL(tile);
|
||||
REGISTER_OCL(fused_conv_eltwise);
|
||||
REGISTER_OCL(lstm_dynamic_input);
|
||||
REGISTER_OCL(lstm_dynamic_timeloop);
|
||||
REGISTER_OCL(generic_layer);
|
||||
REGISTER_OCL(gather_tree);
|
||||
REGISTER_OCL(resample);
|
||||
REGISTER_OCL(grn);
|
||||
REGISTER_OCL(ctc_greedy_decoder);
|
||||
REGISTER_OCL(cum_sum);
|
||||
REGISTER_OCL(embedding_bag);
|
||||
REGISTER_OCL(extract_image_patches);
|
||||
}
|
||||
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -13,20 +13,16 @@
|
||||
#include "cldnn/primitives/border.hpp"
|
||||
#include "cldnn/primitives/broadcast.hpp"
|
||||
#include "cldnn/primitives/concatenation.hpp"
|
||||
#include "cldnn/primitives/condition.hpp"
|
||||
#include "cldnn/primitives/convolution.hpp"
|
||||
#include "cldnn/primitives/crop.hpp"
|
||||
#include "cldnn/primitives/custom_gpu_primitive.hpp"
|
||||
#include "cldnn/primitives/data.hpp"
|
||||
#include "cldnn/primitives/deconvolution.hpp"
|
||||
#include "cldnn/primitives/depth_to_space.hpp"
|
||||
#include "cldnn/primitives/detection_output.hpp"
|
||||
#include "cldnn/primitives/eltwise.hpp"
|
||||
#include "cldnn/primitives/fully_connected.hpp"
|
||||
#include "cldnn/primitives/gather.hpp"
|
||||
#include "cldnn/primitives/gather_nd.hpp"
|
||||
#include "cldnn/primitives/gemm.hpp"
|
||||
#include "cldnn/primitives/input_layout.hpp"
|
||||
#include "cldnn/primitives/lrn.hpp"
|
||||
#include "cldnn/primitives/lstm.hpp"
|
||||
#include "cldnn/primitives/lstm_dynamic.hpp"
|
||||
@ -37,8 +33,6 @@
|
||||
#include "cldnn/primitives/one_hot.hpp"
|
||||
#include "cldnn/primitives/permute.hpp"
|
||||
#include "cldnn/primitives/pooling.hpp"
|
||||
#include "cldnn/primitives/prior_box.hpp"
|
||||
#include "cldnn/primitives/proposal.hpp"
|
||||
#include "cldnn/primitives/pyramid_roi_align.hpp"
|
||||
#include "cldnn/primitives/quantize.hpp"
|
||||
#include "cldnn/primitives/reduce.hpp"
|
||||
@ -63,97 +57,88 @@
|
||||
#include "cldnn/primitives/fused_conv_eltwise.hpp"
|
||||
#include "cldnn/primitives/lstm_dynamic_input.hpp"
|
||||
#include "cldnn/primitives/lstm_dynamic_timeloop.hpp"
|
||||
#include "cldnn/primitives/non_max_suppression.hpp"
|
||||
#include "cldnn/primitives/grn.hpp"
|
||||
#include "cldnn/primitives/ctc_greedy_decoder.hpp"
|
||||
#include "cldnn/primitives/loop.hpp"
|
||||
#include "generic_layer.hpp"
|
||||
|
||||
|
||||
namespace cldnn { namespace gpu {
|
||||
void register_implementations_gpu();
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
void register_implementations();
|
||||
|
||||
namespace detail {
|
||||
|
||||
#define REGISTER_GPU(prim) \
|
||||
struct attach_##prim##_gpu { \
|
||||
attach_##prim##_gpu(); \
|
||||
#define REGISTER_OCL(prim) \
|
||||
struct attach_##prim##_impl { \
|
||||
attach_##prim##_impl(); \
|
||||
}
|
||||
|
||||
REGISTER_GPU(activation);
|
||||
REGISTER_GPU(arg_max_min);
|
||||
REGISTER_GPU(average_unpooling);
|
||||
REGISTER_GPU(batch_to_space);
|
||||
REGISTER_GPU(binary_convolution);
|
||||
REGISTER_GPU(border);
|
||||
REGISTER_GPU(broadcast);
|
||||
REGISTER_GPU(concatenation);
|
||||
REGISTER_GPU(condition);
|
||||
REGISTER_GPU(convolution);
|
||||
REGISTER_GPU(crop);
|
||||
REGISTER_GPU(custom_gpu_primitive);
|
||||
REGISTER_GPU(data);
|
||||
REGISTER_GPU(deconvolution);
|
||||
REGISTER_GPU(deformable_conv);
|
||||
REGISTER_GPU(deformable_interp);
|
||||
REGISTER_GPU(depth_to_space);
|
||||
REGISTER_GPU(detection_output);
|
||||
REGISTER_GPU(eltwise);
|
||||
REGISTER_GPU(embed);
|
||||
REGISTER_GPU(fully_connected);
|
||||
REGISTER_GPU(gather);
|
||||
REGISTER_GPU(gather_nd);
|
||||
REGISTER_GPU(gemm);
|
||||
REGISTER_GPU(input_layout);
|
||||
REGISTER_GPU(lookup_table);
|
||||
REGISTER_GPU(lrn);
|
||||
REGISTER_GPU(lstm_gemm);
|
||||
REGISTER_GPU(lstm_elt);
|
||||
REGISTER_GPU(max_unpooling);
|
||||
REGISTER_GPU(mutable_data);
|
||||
REGISTER_GPU(mvn);
|
||||
REGISTER_GPU(normalize);
|
||||
REGISTER_GPU(one_hot);
|
||||
REGISTER_GPU(permute);
|
||||
REGISTER_GPU(pooling);
|
||||
REGISTER_GPU(prior_box);
|
||||
REGISTER_GPU(proposal);
|
||||
REGISTER_GPU(pyramid_roi_align);
|
||||
REGISTER_GPU(quantize);
|
||||
REGISTER_GPU(reduce);
|
||||
REGISTER_GPU(region_yolo);
|
||||
REGISTER_GPU(reorder);
|
||||
REGISTER_GPU(reorg_yolo);
|
||||
REGISTER_GPU(reshape);
|
||||
REGISTER_GPU(reverse_sequence);
|
||||
REGISTER_GPU(roi_pooling);
|
||||
REGISTER_GPU(scale);
|
||||
REGISTER_GPU(scatter_update);
|
||||
REGISTER_GPU(scatter_elements_update);
|
||||
REGISTER_GPU(scatter_nd_update);
|
||||
REGISTER_GPU(select);
|
||||
REGISTER_GPU(shuffle_channels);
|
||||
REGISTER_GPU(softmax);
|
||||
REGISTER_GPU(space_to_batch);
|
||||
REGISTER_GPU(space_to_depth);
|
||||
REGISTER_GPU(strided_slice);
|
||||
REGISTER_GPU(tile);
|
||||
REGISTER_GPU(fused_conv_eltwise);
|
||||
REGISTER_GPU(lstm_dynamic_input);
|
||||
REGISTER_GPU(lstm_dynamic_timeloop);
|
||||
REGISTER_GPU(generic_layer);
|
||||
REGISTER_GPU(gather_tree);
|
||||
REGISTER_GPU(resample);
|
||||
REGISTER_GPU(non_max_suppression);
|
||||
REGISTER_GPU(grn);
|
||||
REGISTER_GPU(ctc_greedy_decoder);
|
||||
REGISTER_GPU(cum_sum);
|
||||
REGISTER_GPU(embedding_bag);
|
||||
REGISTER_GPU(extract_image_patches);
|
||||
REGISTER_GPU(loop);
|
||||
REGISTER_OCL(activation);
|
||||
REGISTER_OCL(arg_max_min);
|
||||
REGISTER_OCL(average_unpooling);
|
||||
REGISTER_OCL(batch_to_space);
|
||||
REGISTER_OCL(binary_convolution);
|
||||
REGISTER_OCL(border);
|
||||
REGISTER_OCL(broadcast);
|
||||
REGISTER_OCL(concatenation);
|
||||
REGISTER_OCL(convolution);
|
||||
REGISTER_OCL(crop);
|
||||
REGISTER_OCL(custom_gpu_primitive);
|
||||
REGISTER_OCL(data);
|
||||
REGISTER_OCL(deconvolution);
|
||||
REGISTER_OCL(deformable_conv);
|
||||
REGISTER_OCL(deformable_interp);
|
||||
REGISTER_OCL(depth_to_space);
|
||||
REGISTER_OCL(eltwise);
|
||||
REGISTER_OCL(embed);
|
||||
REGISTER_OCL(fully_connected);
|
||||
REGISTER_OCL(gather);
|
||||
REGISTER_OCL(gather_nd);
|
||||
REGISTER_OCL(gemm);
|
||||
REGISTER_OCL(lrn);
|
||||
REGISTER_OCL(lstm_gemm);
|
||||
REGISTER_OCL(lstm_elt);
|
||||
REGISTER_OCL(max_unpooling);
|
||||
REGISTER_OCL(mutable_data);
|
||||
REGISTER_OCL(mvn);
|
||||
REGISTER_OCL(normalize);
|
||||
REGISTER_OCL(one_hot);
|
||||
REGISTER_OCL(permute);
|
||||
REGISTER_OCL(pooling);
|
||||
REGISTER_OCL(pyramid_roi_align);
|
||||
REGISTER_OCL(quantize);
|
||||
REGISTER_OCL(reduce);
|
||||
REGISTER_OCL(region_yolo);
|
||||
REGISTER_OCL(reorder);
|
||||
REGISTER_OCL(reorg_yolo);
|
||||
REGISTER_OCL(reshape);
|
||||
REGISTER_OCL(reverse_sequence);
|
||||
REGISTER_OCL(roi_pooling);
|
||||
REGISTER_OCL(scale);
|
||||
REGISTER_OCL(scatter_update);
|
||||
REGISTER_OCL(scatter_elements_update);
|
||||
REGISTER_OCL(scatter_nd_update);
|
||||
REGISTER_OCL(select);
|
||||
REGISTER_OCL(shuffle_channels);
|
||||
REGISTER_OCL(softmax);
|
||||
REGISTER_OCL(space_to_batch);
|
||||
REGISTER_OCL(space_to_depth);
|
||||
REGISTER_OCL(strided_slice);
|
||||
REGISTER_OCL(tile);
|
||||
REGISTER_OCL(fused_conv_eltwise);
|
||||
REGISTER_OCL(lstm_dynamic_input);
|
||||
REGISTER_OCL(lstm_dynamic_timeloop);
|
||||
REGISTER_OCL(generic_layer);
|
||||
REGISTER_OCL(gather_tree);
|
||||
REGISTER_OCL(resample);
|
||||
REGISTER_OCL(grn);
|
||||
REGISTER_OCL(ctc_greedy_decoder);
|
||||
REGISTER_OCL(cum_sum);
|
||||
REGISTER_OCL(embedding_bag);
|
||||
REGISTER_OCL(extract_image_patches);
|
||||
|
||||
#undef REGISTER_GPU
|
||||
#undef REGISTER_OCL
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,22 +3,22 @@
|
||||
//
|
||||
|
||||
#include "reorder_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "reorder/reorder_kernel_selector.h"
|
||||
#include "reorder/reorder_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct reorder_gpu : typed_primitive_gpu_impl<reorder> {
|
||||
using parent = typed_primitive_gpu_impl<reorder>;
|
||||
struct reorder_impl : typed_primitive_impl_ocl<reorder> {
|
||||
using parent = typed_primitive_impl_ocl<reorder>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<reorder_gpu>(*this);
|
||||
return make_unique<reorder_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -108,7 +108,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto reorder = new reorder_gpu(arg, best_kernels[0]);
|
||||
auto reorder = new reorder_impl(arg, best_kernels[0]);
|
||||
|
||||
return reorder;
|
||||
}
|
||||
@ -116,10 +116,10 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_reorder_gpu::attach_reorder_gpu() {
|
||||
implementation_map<reorder>::add({{engine_types::ocl, reorder_gpu::create}});
|
||||
attach_reorder_impl::attach_reorder_impl() {
|
||||
implementation_map<reorder>::add(impl_types::ocl, reorder_impl::create, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,22 +3,22 @@
|
||||
//
|
||||
|
||||
#include "reorg_yolo_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "reorg_yolo/reorg_yolo_kernel_selector.h"
|
||||
#include "reorg_yolo/reorg_yolo_kernel_ref.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
|
||||
using parent = typed_primitive_gpu_impl<reorg_yolo>;
|
||||
struct reorg_yolo_impl : typed_primitive_impl_ocl<reorg_yolo> {
|
||||
using parent = typed_primitive_impl_ocl<reorg_yolo>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<reorg_yolo_gpu>(*this);
|
||||
return make_unique<reorg_yolo_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const reorg_yolo_node& arg) {
|
||||
@ -38,7 +38,7 @@ struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto reorg_yolo_node = new reorg_yolo_gpu(arg, best_kernels[0]);
|
||||
auto reorg_yolo_node = new reorg_yolo_impl(arg, best_kernels[0]);
|
||||
|
||||
return reorg_yolo_node;
|
||||
}
|
||||
@ -46,16 +46,17 @@ struct reorg_yolo_gpu : typed_primitive_gpu_impl<reorg_yolo> {
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_reorg_yolo_gpu::attach_reorg_yolo_gpu() {
|
||||
auto val_fw = reorg_yolo_gpu::create;
|
||||
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
|
||||
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
|
||||
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
|
||||
implementation_map<reorg_yolo>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
|
||||
attach_reorg_yolo_impl::attach_reorg_yolo_impl() {
|
||||
implementation_map<reorg_yolo>::add(impl_types::ocl, reorg_yolo_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,15 +3,15 @@
|
||||
//
|
||||
|
||||
#include "resample_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "kernel_selector/core/actual_kernels/resample/resample_kernel_selector.h"
|
||||
#include "kernel_selector/core/actual_kernels/resample/resample_kernel_base.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
namespace {
|
||||
inline kernel_selector::sample_type convert_to_sample_type(resample_type type) {
|
||||
@ -96,12 +96,12 @@ inline kernel_selector::interpolate_axis convert_axis(resample::resample_axis ax
|
||||
}
|
||||
} // namespace
|
||||
|
||||
struct resample_gpu : typed_primitive_gpu_impl<resample> {
|
||||
using parent = typed_primitive_gpu_impl<resample>;
|
||||
struct resample_impl : typed_primitive_impl_ocl<resample> {
|
||||
using parent = typed_primitive_impl_ocl<resample>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<resample_gpu>(*this);
|
||||
return make_unique<resample_impl>(*this);
|
||||
}
|
||||
|
||||
static primitive_impl* create(const resample_node& arg) {
|
||||
@ -135,7 +135,7 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto resample = new resample_gpu(arg, best_kernels[0]);
|
||||
auto resample = new resample_impl(arg, best_kernels[0]);
|
||||
|
||||
return resample;
|
||||
}
|
||||
@ -143,31 +143,32 @@ struct resample_gpu : typed_primitive_gpu_impl<resample> {
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_resample_gpu::attach_resample_gpu() {
|
||||
implementation_map<resample>::add(
|
||||
{{std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::fs_b_yx_fsv32), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv16), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv16), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::bfzyx), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::bfzyx), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv16), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv16), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f32, format::b_fs_yx_fsv4), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::f16, format::b_fs_yx_fsv4), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), resample_gpu::create},
|
||||
{std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), resample_gpu::create}});
|
||||
attach_resample_impl::attach_resample_impl() {
|
||||
implementation_map<resample>::add(impl_types::ocl, resample_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,28 +3,28 @@
|
||||
//
|
||||
|
||||
#include "reshape_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "reshape/reshape_kernel_ref.h"
|
||||
#include "reshape/reshape_kernel_selector.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
struct reshape_gpu : public typed_primitive_gpu_impl<reshape> {
|
||||
using parent = typed_primitive_gpu_impl<reshape>;
|
||||
struct reshape_impl : public typed_primitive_impl_ocl<reshape> {
|
||||
using parent = typed_primitive_impl_ocl<reshape>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<reshape_gpu>(*this);
|
||||
return make_unique<reshape_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(reshape_node const& arg) {
|
||||
if (arg.can_be_optimized()) {
|
||||
return new reshape_gpu(arg, {});
|
||||
return new reshape_impl(arg, {});
|
||||
}
|
||||
|
||||
auto reorder_params = get_default_params<kernel_selector::reshape_params>(arg);
|
||||
@ -39,7 +39,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto reshape = new reshape_gpu(arg, best_kernels[0]);
|
||||
auto reshape = new reshape_impl(arg, best_kernels[0]);
|
||||
|
||||
return reshape;
|
||||
}
|
||||
@ -47,10 +47,10 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_reshape_gpu::attach_reshape_gpu() {
|
||||
implementation_map<reshape>::add({{engine_types::ocl, reshape_gpu::create}});
|
||||
attach_reshape_impl::attach_reshape_impl() {
|
||||
implementation_map<reshape>::add(impl_types::ocl, reshape_impl::create, {});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "reverse_sequence_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "reverse_sequence/reverse_sequence_kernel_selector.h"
|
||||
#include "reverse_sequence/reverse_sequence_kernel_ref.h"
|
||||
@ -13,13 +13,13 @@
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
struct reverse_sequence_gpu : typed_primitive_gpu_impl<reverse_sequence> {
|
||||
using parent = typed_primitive_gpu_impl<reverse_sequence>;
|
||||
namespace ocl {
|
||||
struct reverse_sequence_impl : typed_primitive_impl_ocl<reverse_sequence> {
|
||||
using parent = typed_primitive_impl_ocl<reverse_sequence>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<reverse_sequence_gpu>(*this);
|
||||
return make_unique<reverse_sequence_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -41,7 +41,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto reverse_sequence = new reverse_sequence_gpu(arg, best_kernels[0]);
|
||||
auto reverse_sequence = new reverse_sequence_impl(arg, best_kernels[0]);
|
||||
|
||||
return reverse_sequence;
|
||||
}
|
||||
@ -49,15 +49,16 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_reverse_sequence_gpu::attach_reverse_sequence_gpu() {
|
||||
auto val_fw = reverse_sequence_gpu::create;
|
||||
implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
|
||||
implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
|
||||
attach_reverse_sequence_impl::attach_reverse_sequence_impl() {
|
||||
implementation_map<reverse_sequence>::add(impl_types::ocl, reverse_sequence_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,15 +3,15 @@
|
||||
//
|
||||
|
||||
#include "roi_pooling_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "roi_pooling/roi_pooling_kernel_selector.h"
|
||||
#include "roi_pooling/roi_pooling_kernel_ref.h"
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
|
||||
namespace {
|
||||
kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
|
||||
@ -33,12 +33,12 @@ kernel_selector::pool_type cldnn_2_pool_type(pooling_mode mode) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
struct roi_pooling_gpu : typed_primitive_gpu_impl<roi_pooling> {
|
||||
using parent = typed_primitive_gpu_impl<roi_pooling>;
|
||||
struct roi_pooling_impl : typed_primitive_impl_ocl<roi_pooling> {
|
||||
using parent = typed_primitive_impl_ocl<roi_pooling>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<roi_pooling_gpu>(*this);
|
||||
return make_unique<roi_pooling_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
@ -108,7 +108,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto roi_pool = new roi_pooling_gpu(arg, best_kernels[0]);
|
||||
auto roi_pool = new roi_pooling_impl(arg, best_kernels[0]);
|
||||
|
||||
return roi_pool;
|
||||
}
|
||||
@ -116,13 +116,13 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_roi_pooling_gpu::attach_roi_pooling_gpu() {
|
||||
implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx),
|
||||
roi_pooling_gpu::create);
|
||||
implementation_map<roi_pooling>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
|
||||
roi_pooling_gpu::create);
|
||||
attach_roi_pooling_impl::attach_roi_pooling_impl() {
|
||||
implementation_map<roi_pooling>::add(impl_types::ocl, roi_pooling_impl::create, {
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
136
inference-engine/thirdparty/clDNN/src/impls/ocl/scale.cpp
vendored
Normal file
136
inference-engine/thirdparty/clDNN/src/impls/ocl/scale.cpp
vendored
Normal file
@ -0,0 +1,136 @@
|
||||
// Copyright (C) 2018-2021 Intel Corporation
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
#include "scale_inst.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "eltwise/eltwise_kernel_selector.h"
|
||||
#include "eltwise/eltwise_kernel_base.h"
|
||||
#include "cldnn/runtime/error_handler.hpp"
|
||||
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace ocl {
|
||||
|
||||
struct scale_impl : typed_primitive_impl_ocl<scale> {
|
||||
using parent = typed_primitive_impl_ocl<scale>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<scale_impl>(*this);
|
||||
}
|
||||
|
||||
protected:
|
||||
kernel_arguments_data get_arguments(typed_primitive_inst<scale>& instance, int32_t split) const override {
|
||||
kernel_arguments_data args = parent::get_arguments(instance, split);
|
||||
args.inputs = {instance.input_memory_ptr(), instance.scale_memory()};
|
||||
args.output = instance.output_memory_ptr();
|
||||
|
||||
if (_outer.bias_term()) {
|
||||
args.inputs.push_back(instance.bias_memory());
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
public:
|
||||
static primitive_impl* create(const scale_node& arg) {
|
||||
auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
|
||||
auto ew_optional_params =
|
||||
get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
|
||||
|
||||
ew_params.inputs.push_back(convert_data_tensor(arg.scale_in().get_output_layout()));
|
||||
|
||||
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Buffer(0),
|
||||
kernel_selector::eltwise_params::InputType::Buffer(1)},
|
||||
kernel_selector::eltwise_mode::MUL});
|
||||
|
||||
if (arg.bias_term()) {
|
||||
ew_params.inputs.push_back(convert_data_tensor(arg.bias().get_output_layout()));
|
||||
ew_params.operations.push_back({{kernel_selector::eltwise_params::InputType::Intermediate(0),
|
||||
kernel_selector::eltwise_params::InputType::Buffer(2)},
|
||||
kernel_selector::eltwise_mode::ADD});
|
||||
}
|
||||
|
||||
ew_params.layoutBased = true;
|
||||
|
||||
auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
|
||||
auto best_kernels = kernel_selector.GetBestKernels(ew_params, ew_optional_params);
|
||||
|
||||
CLDNN_ERROR_BOOL(arg.id(),
|
||||
"Best_kernel.empty()",
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto scale = new scale_impl(arg, best_kernels[0]);
|
||||
|
||||
return scale;
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_scale_impl::attach_scale_impl() {
|
||||
implementation_map<scale>::add(impl_types::ocl, scale_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::yxfb),
|
||||
std::make_tuple(data_types::f16, format::yxfb),
|
||||
std::make_tuple(data_types::i32, format::yxfb),
|
||||
std::make_tuple(data_types::f32, format::byxf),
|
||||
std::make_tuple(data_types::f16, format::byxf),
|
||||
std::make_tuple(data_types::i32, format::byxf),
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::u8, format::bfyx),
|
||||
std::make_tuple(data_types::i8, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::u8, format::bfzyx),
|
||||
std::make_tuple(data_types::i8, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
std::make_tuple(data_types::u8, format::bfwzyx),
|
||||
std::make_tuple(data_types::i8, format::bfwzyx),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv16),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_zyx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::fs_b_yx_fsv32),
|
||||
std::make_tuple(data_types::i32, format::fs_b_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::f16, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::i32, format::bs_fs_yx_bsv16_fsv16),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv4),
|
||||
std::make_tuple(data_types::u8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::i32, format::b_fs_yx_fsv32),
|
||||
std::make_tuple(data_types::u8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::i8, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f16, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::f32, format::b_fs_zyx_fsv32),
|
||||
std::make_tuple(data_types::i32, format::b_fs_zyx_fsv32),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
@ -3,8 +3,8 @@
|
||||
//
|
||||
|
||||
#include "scatter_elements_update_inst.h"
|
||||
#include "primitive_gpu_base.h"
|
||||
#include "implementation_map.h"
|
||||
#include "primitive_base.hpp"
|
||||
#include "impls/implementation_map.hpp"
|
||||
#include "kernel_selector_helper.h"
|
||||
#include "scatter_update/scatter_elements_update_kernel_selector.h"
|
||||
#include "scatter_update/scatter_elements_update_kernel_ref.h"
|
||||
@ -13,7 +13,7 @@
|
||||
using namespace cldnn;
|
||||
|
||||
namespace cldnn {
|
||||
namespace gpu {
|
||||
namespace ocl {
|
||||
kernel_selector::scatter_update_axis convert_axis(scatter_elements_update::scatter_elements_update_axis axis, const scatter_elements_update_node& arg) {
|
||||
switch (axis) {
|
||||
case scatter_elements_update::along_x:
|
||||
@ -34,12 +34,12 @@ kernel_selector::scatter_update_axis convert_axis(scatter_elements_update::scatt
|
||||
return kernel_selector::scatter_update_axis::X;
|
||||
}
|
||||
|
||||
struct scatter_elements_update_gpu : typed_primitive_gpu_impl<scatter_elements_update> {
|
||||
using parent = typed_primitive_gpu_impl<scatter_elements_update>;
|
||||
struct scatter_elements_update_impl : typed_primitive_impl_ocl<scatter_elements_update> {
|
||||
using parent = typed_primitive_impl_ocl<scatter_elements_update>;
|
||||
using parent::parent;
|
||||
|
||||
std::unique_ptr<primitive_impl> clone() const override {
|
||||
return make_unique<scatter_elements_update_gpu>(*this);
|
||||
return make_unique<scatter_elements_update_impl>(*this);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -61,7 +61,7 @@ public:
|
||||
best_kernels.empty(),
|
||||
"Cannot find a proper kernel with this arguments");
|
||||
|
||||
auto scatter_elements_update = new scatter_elements_update_gpu(arg, best_kernels[0]);
|
||||
auto scatter_elements_update = new scatter_elements_update_impl(arg, best_kernels[0]);
|
||||
|
||||
return scatter_elements_update;
|
||||
}
|
||||
@ -69,21 +69,20 @@ public:
|
||||
|
||||
namespace detail {
|
||||
|
||||
attach_scatter_elements_update_gpu::attach_scatter_elements_update_gpu() {
|
||||
auto val_fw = scatter_elements_update_gpu::create;
|
||||
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
|
||||
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
|
||||
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
|
||||
|
||||
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfzyx), val_fw);
|
||||
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfzyx), val_fw);
|
||||
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfzyx), val_fw);
|
||||
|
||||
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfwzyx), val_fw);
|
||||
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfwzyx), val_fw);
|
||||
implementation_map<scatter_elements_update>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfwzyx), val_fw);
|
||||
attach_scatter_elements_update_impl::attach_scatter_elements_update_impl() {
|
||||
implementation_map<scatter_elements_update>::add(impl_types::ocl, scatter_elements_update_impl::create, {
|
||||
std::make_tuple(data_types::f32, format::bfyx),
|
||||
std::make_tuple(data_types::f16, format::bfyx),
|
||||
std::make_tuple(data_types::i32, format::bfyx),
|
||||
std::make_tuple(data_types::f32, format::bfzyx),
|
||||
std::make_tuple(data_types::f16, format::bfzyx),
|
||||
std::make_tuple(data_types::i32, format::bfzyx),
|
||||
std::make_tuple(data_types::f32, format::bfwzyx),
|
||||
std::make_tuple(data_types::f16, format::bfwzyx),
|
||||
std::make_tuple(data_types::i32, format::bfwzyx),
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
} // namespace gpu
|
||||
} // namespace ocl
|
||||
} // namespace cldnn
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user